1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
54 /* This is used for communication between ASM_OUTPUT_LABEL and
55 ASM_OUTPUT_LABELREF. */
56 int ia64_asm_output_label = 0;
58 /* Define the information needed to generate branch and scc insns. This is
59 stored from the compare operation. */
60 struct rtx_def * ia64_compare_op0;
61 struct rtx_def * ia64_compare_op1;
63 /* Register names for ia64_expand_prologue. */
64 static const char * const ia64_reg_numbers[96] =
65 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
66 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
67 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
68 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
69 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
70 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
71 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
72 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
73 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
74 "r104","r105","r106","r107","r108","r109","r110","r111",
75 "r112","r113","r114","r115","r116","r117","r118","r119",
76 "r120","r121","r122","r123","r124","r125","r126","r127"};
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_input_reg_names[8] =
80 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
82 /* ??? These strings could be shared with REGISTER_NAMES. */
83 static const char * const ia64_local_reg_names[80] =
84 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
85 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
86 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
87 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
88 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
89 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
90 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
91 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
92 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
93 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
95 /* ??? These strings could be shared with REGISTER_NAMES. */
96 static const char * const ia64_output_reg_names[8] =
97 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
99 /* String used with the -mfixed-range= option. */
100 const char *ia64_fixed_range_string;
102 /* Determines whether we use adds, addl, or movl to generate our
103 TLS immediate offsets. */
104 int ia64_tls_size = 22;
106 /* String used with the -mtls-size= option. */
107 const char *ia64_tls_size_string;
109 /* Which cpu are we scheduling for. */
110 enum processor_type ia64_tune;
112 /* String used with the -tune= option. */
113 const char *ia64_tune_string;
115 /* Determines whether we run our final scheduling pass or not. We always
116 avoid the normal second scheduling pass. */
117 static int ia64_flag_schedule_insns2;
119 /* Variables which are this size or smaller are put in the sdata/sbss
122 unsigned int ia64_section_threshold;
124 /* The following variable is used by the DFA insn scheduler. The value is
125 TRUE if we do insn bundling instead of insn scheduling. */
128 /* Structure to be filled in by ia64_compute_frame_size with register
129 save masks and offsets for the current function. */
131 struct ia64_frame_info
133 HOST_WIDE_INT total_size; /* size of the stack frame, not including
134 the caller's scratch area. */
135 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
136 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
137 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
138 HARD_REG_SET mask; /* mask of saved registers. */
139 unsigned int gr_used_mask; /* mask of registers in use as gr spill
140 registers or long-term scratches. */
141 int n_spilled; /* number of spilled registers. */
142 int reg_fp; /* register for fp. */
143 int reg_save_b0; /* save register for b0. */
144 int reg_save_pr; /* save register for prs. */
145 int reg_save_ar_pfs; /* save register for ar.pfs. */
146 int reg_save_ar_unat; /* save register for ar.unat. */
147 int reg_save_ar_lc; /* save register for ar.lc. */
148 int reg_save_gp; /* save register for gp. */
149 int n_input_regs; /* number of input registers used. */
150 int n_local_regs; /* number of local registers used. */
151 int n_output_regs; /* number of output registers used. */
152 int n_rotate_regs; /* number of rotating registers used. */
154 char need_regstk; /* true if a .regstk directive needed. */
155 char initialized; /* true if the data is finalized. */
158 /* Current frame information calculated by ia64_compute_frame_size. */
159 static struct ia64_frame_info current_frame_info;
161 static int ia64_use_dfa_pipeline_interface (void);
162 static int ia64_first_cycle_multipass_dfa_lookahead (void);
163 static void ia64_dependencies_evaluation_hook (rtx, rtx);
164 static void ia64_init_dfa_pre_cycle_insn (void);
165 static rtx ia64_dfa_pre_cycle_insn (void);
166 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
167 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
168 static rtx gen_tls_get_addr (void);
169 static rtx gen_thread_pointer (void);
170 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
171 static int find_gr_spill (int);
172 static int next_scratch_gr_reg (void);
173 static void mark_reg_gr_used_mask (rtx, void *);
174 static void ia64_compute_frame_size (HOST_WIDE_INT);
175 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
176 static void finish_spill_pointers (void);
177 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
178 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
179 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
180 static rtx gen_movdi_x (rtx, rtx, rtx);
181 static rtx gen_fr_spill_x (rtx, rtx, rtx);
182 static rtx gen_fr_restore_x (rtx, rtx, rtx);
184 static enum machine_mode hfa_element_mode (tree, int);
185 static bool ia64_function_ok_for_sibcall (tree, tree);
186 static bool ia64_rtx_costs (rtx, int, int, int *);
187 static void fix_range (const char *);
188 static struct machine_function * ia64_init_machine_status (void);
189 static void emit_insn_group_barriers (FILE *);
190 static void emit_all_insn_group_barriers (FILE *);
191 static void final_emit_insn_group_barriers (FILE *);
192 static void emit_predicate_relation_info (void);
193 static void ia64_reorg (void);
194 static bool ia64_in_small_data_p (tree);
195 static void process_epilogue (void);
196 static int process_set (FILE *, rtx);
198 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
199 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
200 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
202 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
203 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
204 static bool ia64_assemble_integer (rtx, unsigned int, int);
205 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
206 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
207 static void ia64_output_function_end_prologue (FILE *);
209 static int ia64_issue_rate (void);
210 static int ia64_adjust_cost (rtx, rtx, rtx, int);
211 static void ia64_sched_init (FILE *, int, int);
212 static void ia64_sched_finish (FILE *, int);
213 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
214 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
215 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
216 static int ia64_variable_issue (FILE *, int, rtx, int);
218 static struct bundle_state *get_free_bundle_state (void);
219 static void free_bundle_state (struct bundle_state *);
220 static void initiate_bundle_states (void);
221 static void finish_bundle_states (void);
222 static unsigned bundle_state_hash (const void *);
223 static int bundle_state_eq_p (const void *, const void *);
224 static int insert_bundle_state (struct bundle_state *);
225 static void initiate_bundle_state_table (void);
226 static void finish_bundle_state_table (void);
227 static int try_issue_nops (struct bundle_state *, int);
228 static int try_issue_insn (struct bundle_state *, rtx);
229 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
230 static int get_max_pos (state_t);
231 static int get_template (state_t, int);
233 static rtx get_next_important_insn (rtx, rtx);
234 static void bundling (FILE *, int, rtx, rtx);
236 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
237 HOST_WIDE_INT, tree);
238 static void ia64_file_start (void);
240 static void ia64_select_rtx_section (enum machine_mode, rtx,
241 unsigned HOST_WIDE_INT);
242 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
244 static void ia64_rwreloc_unique_section (tree, int)
246 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
247 unsigned HOST_WIDE_INT)
249 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
252 static void ia64_hpux_add_extern_decl (const char *name)
254 static void ia64_hpux_file_end (void)
256 static void ia64_hpux_init_libfuncs (void)
258 static void ia64_vms_init_libfuncs (void)
261 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
262 static void ia64_encode_section_info (tree, rtx, int);
265 /* Table of valid machine attributes. */
266 static const struct attribute_spec ia64_attribute_table[] =
268 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
269 { "syscall_linkage", 0, 0, false, true, true, NULL },
270 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
271 { NULL, 0, 0, false, false, false, NULL }
274 /* Initialize the GCC target structure. */
275 #undef TARGET_ATTRIBUTE_TABLE
276 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
278 #undef TARGET_INIT_BUILTINS
279 #define TARGET_INIT_BUILTINS ia64_init_builtins
281 #undef TARGET_EXPAND_BUILTIN
282 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
284 #undef TARGET_ASM_BYTE_OP
285 #define TARGET_ASM_BYTE_OP "\tdata1\t"
286 #undef TARGET_ASM_ALIGNED_HI_OP
287 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
288 #undef TARGET_ASM_ALIGNED_SI_OP
289 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
292 #undef TARGET_ASM_UNALIGNED_HI_OP
293 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
294 #undef TARGET_ASM_UNALIGNED_SI_OP
295 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
296 #undef TARGET_ASM_UNALIGNED_DI_OP
297 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
298 #undef TARGET_ASM_INTEGER
299 #define TARGET_ASM_INTEGER ia64_assemble_integer
301 #undef TARGET_ASM_FUNCTION_PROLOGUE
302 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
303 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
304 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
305 #undef TARGET_ASM_FUNCTION_EPILOGUE
306 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
308 #undef TARGET_IN_SMALL_DATA_P
309 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
311 #undef TARGET_SCHED_ADJUST_COST
312 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
313 #undef TARGET_SCHED_ISSUE_RATE
314 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
315 #undef TARGET_SCHED_VARIABLE_ISSUE
316 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
317 #undef TARGET_SCHED_INIT
318 #define TARGET_SCHED_INIT ia64_sched_init
319 #undef TARGET_SCHED_FINISH
320 #define TARGET_SCHED_FINISH ia64_sched_finish
321 #undef TARGET_SCHED_REORDER
322 #define TARGET_SCHED_REORDER ia64_sched_reorder
323 #undef TARGET_SCHED_REORDER2
324 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
326 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
327 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
329 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
330 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
332 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
333 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
335 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
336 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
337 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
338 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
340 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
341 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
342 ia64_first_cycle_multipass_dfa_lookahead_guard
344 #undef TARGET_SCHED_DFA_NEW_CYCLE
345 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
348 #undef TARGET_HAVE_TLS
349 #define TARGET_HAVE_TLS true
352 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
353 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
355 #undef TARGET_ASM_OUTPUT_MI_THUNK
356 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
357 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
358 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
360 #undef TARGET_ASM_FILE_START
361 #define TARGET_ASM_FILE_START ia64_file_start
363 #undef TARGET_RTX_COSTS
364 #define TARGET_RTX_COSTS ia64_rtx_costs
365 #undef TARGET_ADDRESS_COST
366 #define TARGET_ADDRESS_COST hook_int_rtx_0
368 #undef TARGET_MACHINE_DEPENDENT_REORG
369 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
371 #undef TARGET_ENCODE_SECTION_INFO
372 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
374 struct gcc_target targetm = TARGET_INITIALIZER;
376 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
379 call_operand (rtx op, enum machine_mode mode)
381 if (mode != GET_MODE (op) && mode != VOIDmode)
384 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
385 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
388 /* Return 1 if OP refers to a symbol in the sdata section. */
391 sdata_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
393 switch (GET_CODE (op))
396 if (GET_CODE (XEXP (op, 0)) != PLUS
397 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
399 op = XEXP (XEXP (op, 0), 0);
403 if (CONSTANT_POOL_ADDRESS_P (op))
404 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
406 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
416 small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
418 return SYMBOL_REF_SMALL_ADDR_P (op);
421 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
424 got_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
426 switch (GET_CODE (op))
430 if (GET_CODE (op) != PLUS)
432 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
435 if (GET_CODE (op) != CONST_INT)
440 /* Ok if we're not using GOT entries at all. */
441 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
444 /* "Ok" while emitting rtl, since otherwise we won't be provided
445 with the entire offset during emission, which makes it very
446 hard to split the offset into high and low parts. */
447 if (rtx_equal_function_value_matters)
450 /* Force the low 14 bits of the constant to zero so that we do not
451 use up so many GOT entries. */
452 return (INTVAL (op) & 0x3fff) == 0;
455 if (SYMBOL_REF_SMALL_ADDR_P (op))
466 /* Return 1 if OP refers to a symbol. */
469 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
471 switch (GET_CODE (op))
484 /* Return tls_model if OP refers to a TLS symbol. */
487 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
489 if (GET_CODE (op) != SYMBOL_REF)
491 return SYMBOL_REF_TLS_MODEL (op);
495 /* Return 1 if OP refers to a function. */
498 function_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
500 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
506 /* Return 1 if OP is setjmp or a similar function. */
508 /* ??? This is an unsatisfying solution. Should rethink. */
511 setjmp_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
516 if (GET_CODE (op) != SYMBOL_REF)
521 /* The following code is borrowed from special_function_p in calls.c. */
523 /* Disregard prefix _, __ or __x. */
526 if (name[1] == '_' && name[2] == 'x')
528 else if (name[1] == '_')
538 && (! strcmp (name, "setjmp")
539 || ! strcmp (name, "setjmp_syscall")))
541 && ! strcmp (name, "sigsetjmp"))
543 && ! strcmp (name, "savectx")));
545 else if ((name[0] == 'q' && name[1] == 's'
546 && ! strcmp (name, "qsetjmp"))
547 || (name[0] == 'v' && name[1] == 'f'
548 && ! strcmp (name, "vfork")))
554 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
557 move_operand (rtx op, enum machine_mode mode)
559 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
562 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
565 gr_register_operand (rtx op, enum machine_mode mode)
567 if (! register_operand (op, mode))
569 if (GET_CODE (op) == SUBREG)
570 op = SUBREG_REG (op);
571 if (GET_CODE (op) == REG)
573 unsigned int regno = REGNO (op);
574 if (regno < FIRST_PSEUDO_REGISTER)
575 return GENERAL_REGNO_P (regno);
580 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
583 fr_register_operand (rtx op, enum machine_mode mode)
585 if (! register_operand (op, mode))
587 if (GET_CODE (op) == SUBREG)
588 op = SUBREG_REG (op);
589 if (GET_CODE (op) == REG)
591 unsigned int regno = REGNO (op);
592 if (regno < FIRST_PSEUDO_REGISTER)
593 return FR_REGNO_P (regno);
598 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
601 grfr_register_operand (rtx op, enum machine_mode mode)
603 if (! register_operand (op, mode))
605 if (GET_CODE (op) == SUBREG)
606 op = SUBREG_REG (op);
607 if (GET_CODE (op) == REG)
609 unsigned int regno = REGNO (op);
610 if (regno < FIRST_PSEUDO_REGISTER)
611 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
616 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
619 gr_nonimmediate_operand (rtx op, enum machine_mode mode)
621 if (! nonimmediate_operand (op, mode))
623 if (GET_CODE (op) == SUBREG)
624 op = SUBREG_REG (op);
625 if (GET_CODE (op) == REG)
627 unsigned int regno = REGNO (op);
628 if (regno < FIRST_PSEUDO_REGISTER)
629 return GENERAL_REGNO_P (regno);
634 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
637 fr_nonimmediate_operand (rtx op, enum machine_mode mode)
639 if (! nonimmediate_operand (op, mode))
641 if (GET_CODE (op) == SUBREG)
642 op = SUBREG_REG (op);
643 if (GET_CODE (op) == REG)
645 unsigned int regno = REGNO (op);
646 if (regno < FIRST_PSEUDO_REGISTER)
647 return FR_REGNO_P (regno);
652 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
655 grfr_nonimmediate_operand (rtx op, enum machine_mode mode)
657 if (! nonimmediate_operand (op, mode))
659 if (GET_CODE (op) == SUBREG)
660 op = SUBREG_REG (op);
661 if (GET_CODE (op) == REG)
663 unsigned int regno = REGNO (op);
664 if (regno < FIRST_PSEUDO_REGISTER)
665 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
670 /* Return 1 if OP is a GR register operand, or zero. */
673 gr_reg_or_0_operand (rtx op, enum machine_mode mode)
675 return (op == const0_rtx || gr_register_operand (op, mode));
678 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
681 gr_reg_or_5bit_operand (rtx op, enum machine_mode mode)
683 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
684 || GET_CODE (op) == CONSTANT_P_RTX
685 || gr_register_operand (op, mode));
688 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
691 gr_reg_or_6bit_operand (rtx op, enum machine_mode mode)
693 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
694 || GET_CODE (op) == CONSTANT_P_RTX
695 || gr_register_operand (op, mode));
698 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
701 gr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
703 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
704 || GET_CODE (op) == CONSTANT_P_RTX
705 || gr_register_operand (op, mode));
708 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
711 grfr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
713 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
714 || GET_CODE (op) == CONSTANT_P_RTX
715 || grfr_register_operand (op, mode));
718 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
722 gr_reg_or_8bit_adjusted_operand (rtx op, enum machine_mode mode)
724 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
725 || GET_CODE (op) == CONSTANT_P_RTX
726 || gr_register_operand (op, mode));
729 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
730 immediate and an 8 bit adjusted immediate operand. This is necessary
731 because when we emit a compare, we don't know what the condition will be,
732 so we need the union of the immediates accepted by GT and LT. */
735 gr_reg_or_8bit_and_adjusted_operand (rtx op, enum machine_mode mode)
737 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
738 && CONST_OK_FOR_L (INTVAL (op)))
739 || GET_CODE (op) == CONSTANT_P_RTX
740 || gr_register_operand (op, mode));
743 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
746 gr_reg_or_14bit_operand (rtx op, enum machine_mode mode)
748 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
749 || GET_CODE (op) == CONSTANT_P_RTX
750 || gr_register_operand (op, mode));
753 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
756 gr_reg_or_22bit_operand (rtx op, enum machine_mode mode)
758 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
759 || GET_CODE (op) == CONSTANT_P_RTX
760 || gr_register_operand (op, mode));
763 /* Return 1 if OP is a 6 bit immediate operand. */
766 shift_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
768 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
769 || GET_CODE (op) == CONSTANT_P_RTX);
772 /* Return 1 if OP is a 5 bit immediate operand. */
775 shift_32bit_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
777 return ((GET_CODE (op) == CONST_INT
778 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
779 || GET_CODE (op) == CONSTANT_P_RTX);
782 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
785 shladd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
787 return (GET_CODE (op) == CONST_INT
788 && (INTVAL (op) == 2 || INTVAL (op) == 4
789 || INTVAL (op) == 8 || INTVAL (op) == 16));
792 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
795 fetchadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
797 return (GET_CODE (op) == CONST_INT
798 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
799 INTVAL (op) == -4 || INTVAL (op) == -1 ||
800 INTVAL (op) == 1 || INTVAL (op) == 4 ||
801 INTVAL (op) == 8 || INTVAL (op) == 16));
804 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
807 fr_reg_or_fp01_operand (rtx op, enum machine_mode mode)
809 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
810 || fr_register_operand (op, mode));
813 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
814 POST_MODIFY with a REG as displacement. */
817 destination_operand (rtx op, enum machine_mode mode)
819 if (! nonimmediate_operand (op, mode))
821 if (GET_CODE (op) == MEM
822 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
823 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
828 /* Like memory_operand, but don't allow post-increments. */
831 not_postinc_memory_operand (rtx op, enum machine_mode mode)
833 return (memory_operand (op, mode)
834 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
837 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
838 signed immediate operand. */
841 normal_comparison_operator (register rtx op, enum machine_mode mode)
843 enum rtx_code code = GET_CODE (op);
844 return ((mode == VOIDmode || GET_MODE (op) == mode)
845 && (code == EQ || code == NE
846 || code == GT || code == LE || code == GTU || code == LEU));
849 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
850 signed immediate operand. */
853 adjusted_comparison_operator (register rtx op, enum machine_mode mode)
855 enum rtx_code code = GET_CODE (op);
856 return ((mode == VOIDmode || GET_MODE (op) == mode)
857 && (code == LT || code == GE || code == LTU || code == GEU));
860 /* Return 1 if this is a signed inequality operator. */
863 signed_inequality_operator (register rtx op, enum machine_mode mode)
865 enum rtx_code code = GET_CODE (op);
866 return ((mode == VOIDmode || GET_MODE (op) == mode)
867 && (code == GE || code == GT
868 || code == LE || code == LT));
871 /* Return 1 if this operator is valid for predication. */
874 predicate_operator (register rtx op, enum machine_mode mode)
876 enum rtx_code code = GET_CODE (op);
877 return ((GET_MODE (op) == mode || mode == VOIDmode)
878 && (code == EQ || code == NE));
881 /* Return 1 if this operator can be used in a conditional operation. */
884 condop_operator (register rtx op, enum machine_mode mode)
886 enum rtx_code code = GET_CODE (op);
887 return ((GET_MODE (op) == mode || mode == VOIDmode)
888 && (code == PLUS || code == MINUS || code == AND
889 || code == IOR || code == XOR));
892 /* Return 1 if this is the ar.lc register. */
895 ar_lc_reg_operand (register rtx op, enum machine_mode mode)
897 return (GET_MODE (op) == DImode
898 && (mode == DImode || mode == VOIDmode)
899 && GET_CODE (op) == REG
900 && REGNO (op) == AR_LC_REGNUM);
903 /* Return 1 if this is the ar.ccv register. */
906 ar_ccv_reg_operand (register rtx op, enum machine_mode mode)
908 return ((GET_MODE (op) == mode || mode == VOIDmode)
909 && GET_CODE (op) == REG
910 && REGNO (op) == AR_CCV_REGNUM);
913 /* Return 1 if this is the ar.pfs register. */
916 ar_pfs_reg_operand (register rtx op, enum machine_mode mode)
918 return ((GET_MODE (op) == mode || mode == VOIDmode)
919 && GET_CODE (op) == REG
920 && REGNO (op) == AR_PFS_REGNUM);
923 /* Like general_operand, but don't allow (mem (addressof)). */
926 general_tfmode_operand (rtx op, enum machine_mode mode)
928 if (! general_operand (op, mode))
930 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
938 destination_tfmode_operand (rtx op, enum machine_mode mode)
940 if (! destination_operand (op, mode))
942 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
950 tfreg_or_fp01_operand (rtx op, enum machine_mode mode)
952 if (GET_CODE (op) == SUBREG)
954 return fr_reg_or_fp01_operand (op, mode);
957 /* Return 1 if OP is valid as a base register in a reg + offset address. */
960 basereg_operand (rtx op, enum machine_mode mode)
962 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
963 checks from pa.c basereg_operand as well? Seems to be OK without them
966 return (register_operand (op, mode) &&
967 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
972 ADDR_AREA_NORMAL, /* normal address area */
973 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
977 static GTY(()) tree small_ident1;
978 static GTY(()) tree small_ident2;
983 if (small_ident1 == 0)
985 small_ident1 = get_identifier ("small");
986 small_ident2 = get_identifier ("__small__");
990 /* Retrieve the address area that has been chosen for the given decl. */
992 static ia64_addr_area
993 ia64_get_addr_area (tree decl)
997 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
1003 id = TREE_VALUE (TREE_VALUE (model_attr));
1004 if (id == small_ident1 || id == small_ident2)
1005 return ADDR_AREA_SMALL;
1007 return ADDR_AREA_NORMAL;
1011 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1013 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1014 ia64_addr_area area;
1015 tree arg, decl = *node;
1018 arg = TREE_VALUE (args);
1019 if (arg == small_ident1 || arg == small_ident2)
1021 addr_area = ADDR_AREA_SMALL;
1025 warning ("invalid argument of `%s' attribute",
1026 IDENTIFIER_POINTER (name));
1027 *no_add_attrs = true;
1030 switch (TREE_CODE (decl))
1033 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1035 && !TREE_STATIC (decl))
1037 error ("%Jan address area attribute cannot be specified for "
1038 "local variables", decl, decl);
1039 *no_add_attrs = true;
1041 area = ia64_get_addr_area (decl);
1042 if (area != ADDR_AREA_NORMAL && addr_area != area)
1044 error ("%Jaddress area of '%s' conflicts with previous "
1045 "declaration", decl, decl);
1046 *no_add_attrs = true;
1051 error ("%Jaddress area attribute cannot be specified for functions",
1053 *no_add_attrs = true;
1057 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1058 *no_add_attrs = true;
1066 ia64_encode_addr_area (tree decl, rtx symbol)
1070 flags = SYMBOL_REF_FLAGS (symbol);
1071 switch (ia64_get_addr_area (decl))
1073 case ADDR_AREA_NORMAL: break;
1074 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1077 SYMBOL_REF_FLAGS (symbol) = flags;
1081 ia64_encode_section_info (tree decl, rtx rtl, int first)
1083 default_encode_section_info (decl, rtl, first);
1085 if (TREE_CODE (decl) == VAR_DECL
1086 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1087 ia64_encode_addr_area (decl, XEXP (rtl, 0));
1090 /* Return 1 if the operands of a move are ok. */
1093 ia64_move_ok (rtx dst, rtx src)
1095 /* If we're under init_recog_no_volatile, we'll not be able to use
1096 memory_operand. So check the code directly and don't worry about
1097 the validity of the underlying address, which should have been
1098 checked elsewhere anyway. */
1099 if (GET_CODE (dst) != MEM)
1101 if (GET_CODE (src) == MEM)
1103 if (register_operand (src, VOIDmode))
1106 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1107 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1108 return src == const0_rtx;
1110 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1114 addp4_optimize_ok (rtx op1, rtx op2)
1116 return (basereg_operand (op1, GET_MODE(op1)) !=
1117 basereg_operand (op2, GET_MODE(op2)));
1120 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1121 Return the length of the field, or <= 0 on failure. */
1124 ia64_depz_field_mask (rtx rop, rtx rshift)
1126 unsigned HOST_WIDE_INT op = INTVAL (rop);
1127 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1129 /* Get rid of the zero bits we're shifting in. */
1132 /* We must now have a solid block of 1's at bit 0. */
1133 return exact_log2 (op + 1);
1136 /* Expand a symbolic constant load. */
1139 ia64_expand_load_address (rtx dest, rtx src)
1141 if (tls_symbolic_operand (src, VOIDmode))
1143 if (GET_CODE (dest) != REG)
1146 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1147 having to pointer-extend the value afterward. Other forms of address
1148 computation below are also more natural to compute as 64-bit quantities.
1149 If we've been given an SImode destination register, change it. */
1150 if (GET_MODE (dest) != Pmode)
1151 dest = gen_rtx_REG (Pmode, REGNO (dest));
1153 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1155 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1158 else if (TARGET_AUTO_PIC)
1160 emit_insn (gen_load_gprel64 (dest, src));
1163 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1165 emit_insn (gen_load_fptr (dest, src));
1168 else if (sdata_symbolic_operand (src, VOIDmode))
1170 emit_insn (gen_load_gprel (dest, src));
1174 if (GET_CODE (src) == CONST
1175 && GET_CODE (XEXP (src, 0)) == PLUS
1176 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1177 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1179 rtx sym = XEXP (XEXP (src, 0), 0);
1180 HOST_WIDE_INT ofs, hi, lo;
1182 /* Split the offset into a sign extended 14-bit low part
1183 and a complementary high part. */
1184 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1185 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1188 ia64_expand_load_address (dest, plus_constant (sym, hi));
1189 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1195 tmp = gen_rtx_HIGH (Pmode, src);
1196 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1197 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1199 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1200 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1204 static GTY(()) rtx gen_tls_tga;
1206 gen_tls_get_addr (void)
1209 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1213 static GTY(()) rtx thread_pointer_rtx;
1215 gen_thread_pointer (void)
1217 if (!thread_pointer_rtx)
1219 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1220 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1222 return thread_pointer_rtx;
1226 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
1228 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1233 case TLS_MODEL_GLOBAL_DYNAMIC:
1236 tga_op1 = gen_reg_rtx (Pmode);
1237 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1238 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1239 RTX_UNCHANGING_P (tga_op1) = 1;
1241 tga_op2 = gen_reg_rtx (Pmode);
1242 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1243 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1244 RTX_UNCHANGING_P (tga_op2) = 1;
1246 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1247 LCT_CONST, Pmode, 2, tga_op1,
1248 Pmode, tga_op2, Pmode);
1250 insns = get_insns ();
1253 if (GET_MODE (op0) != Pmode)
1255 emit_libcall_block (insns, op0, tga_ret, op1);
1258 case TLS_MODEL_LOCAL_DYNAMIC:
1259 /* ??? This isn't the completely proper way to do local-dynamic
1260 If the call to __tls_get_addr is used only by a single symbol,
1261 then we should (somehow) move the dtprel to the second arg
1262 to avoid the extra add. */
1265 tga_op1 = gen_reg_rtx (Pmode);
1266 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1267 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1268 RTX_UNCHANGING_P (tga_op1) = 1;
1270 tga_op2 = const0_rtx;
1272 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1273 LCT_CONST, Pmode, 2, tga_op1,
1274 Pmode, tga_op2, Pmode);
1276 insns = get_insns ();
1279 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1281 tmp = gen_reg_rtx (Pmode);
1282 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1284 if (!register_operand (op0, Pmode))
1285 op0 = gen_reg_rtx (Pmode);
1288 emit_insn (gen_load_dtprel (op0, op1));
1289 emit_insn (gen_adddi3 (op0, tmp, op0));
1292 emit_insn (gen_add_dtprel (op0, tmp, op1));
1295 case TLS_MODEL_INITIAL_EXEC:
1296 tmp = gen_reg_rtx (Pmode);
1297 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1298 tmp = gen_rtx_MEM (Pmode, tmp);
1299 RTX_UNCHANGING_P (tmp) = 1;
1300 tmp = force_reg (Pmode, tmp);
1302 if (!register_operand (op0, Pmode))
1303 op0 = gen_reg_rtx (Pmode);
1304 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1307 case TLS_MODEL_LOCAL_EXEC:
1308 if (!register_operand (op0, Pmode))
1309 op0 = gen_reg_rtx (Pmode);
1312 emit_insn (gen_load_tprel (op0, op1));
1313 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1316 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1323 if (orig_op0 == op0)
1325 if (GET_MODE (orig_op0) == Pmode)
1327 return gen_lowpart (GET_MODE (orig_op0), op0);
1331 ia64_expand_move (rtx op0, rtx op1)
1333 enum machine_mode mode = GET_MODE (op0);
1335 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1336 op1 = force_reg (mode, op1);
1338 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1340 enum tls_model tls_kind;
1341 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1342 return ia64_expand_tls_address (tls_kind, op0, op1);
1344 if (!TARGET_NO_PIC && reload_completed)
1346 ia64_expand_load_address (op0, op1);
1354 /* Split a move from OP1 to OP0 conditional on COND. */
1357 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1359 rtx insn, first = get_last_insn ();
1361 emit_move_insn (op0, op1);
1363 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1365 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1369 /* Split a post-reload TImode reference into two DImode components. */
1372 ia64_split_timode (rtx out[2], rtx in, rtx scratch)
1374 switch (GET_CODE (in))
1377 out[0] = gen_rtx_REG (DImode, REGNO (in));
1378 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1383 rtx base = XEXP (in, 0);
1385 switch (GET_CODE (base))
1388 out[0] = adjust_address (in, DImode, 0);
1391 base = XEXP (base, 0);
1392 out[0] = adjust_address (in, DImode, 0);
1395 /* Since we're changing the mode, we need to change to POST_MODIFY
1396 as well to preserve the size of the increment. Either that or
1397 do the update in two steps, but we've already got this scratch
1398 register handy so let's use it. */
1400 base = XEXP (base, 0);
1402 = change_address (in, DImode,
1404 (Pmode, base, plus_constant (base, 16)));
1407 base = XEXP (base, 0);
1409 = change_address (in, DImode,
1411 (Pmode, base, plus_constant (base, -16)));
1417 if (scratch == NULL_RTX)
1419 out[1] = change_address (in, DImode, scratch);
1420 return gen_adddi3 (scratch, base, GEN_INT (8));
1425 split_double (in, &out[0], &out[1]);
1433 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1434 through memory plus an extra GR scratch register. Except that you can
1435 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1436 SECONDARY_RELOAD_CLASS, but not both.
1438 We got into problems in the first place by allowing a construct like
1439 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1440 This solution attempts to prevent this situation from occurring. When
1441 we see something like the above, we spill the inner register to memory. */
1444 spill_tfmode_operand (rtx in, int force)
1446 if (GET_CODE (in) == SUBREG
1447 && GET_MODE (SUBREG_REG (in)) == TImode
1448 && GET_CODE (SUBREG_REG (in)) == REG)
1450 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
1451 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1453 else if (force && GET_CODE (in) == REG)
1455 rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
1456 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1458 else if (GET_CODE (in) == MEM
1459 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1460 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1465 /* Emit comparison instruction if necessary, returning the expression
1466 that holds the compare result in the proper mode. */
1469 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1471 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1474 /* If we have a BImode input, then we already have a compare result, and
1475 do not need to emit another comparison. */
1476 if (GET_MODE (op0) == BImode)
1478 if ((code == NE || code == EQ) && op1 == const0_rtx)
1485 cmp = gen_reg_rtx (BImode);
1486 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1487 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1491 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1494 /* Emit the appropriate sequence for a call. */
1497 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1502 addr = XEXP (addr, 0);
1503 b0 = gen_rtx_REG (DImode, R_BR (0));
1505 /* ??? Should do this for functions known to bind local too. */
1506 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1509 insn = gen_sibcall_nogp (addr);
1511 insn = gen_call_nogp (addr, b0);
1513 insn = gen_call_value_nogp (retval, addr, b0);
1514 insn = emit_call_insn (insn);
1519 insn = gen_sibcall_gp (addr);
1521 insn = gen_call_gp (addr, b0);
1523 insn = gen_call_value_gp (retval, addr, b0);
1524 insn = emit_call_insn (insn);
1526 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1530 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1534 ia64_reload_gp (void)
1538 if (current_frame_info.reg_save_gp)
1539 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1542 HOST_WIDE_INT offset;
1544 offset = (current_frame_info.spill_cfa_off
1545 + current_frame_info.spill_size);
1546 if (frame_pointer_needed)
1548 tmp = hard_frame_pointer_rtx;
1553 tmp = stack_pointer_rtx;
1554 offset = current_frame_info.total_size - offset;
1557 if (CONST_OK_FOR_I (offset))
1558 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1559 tmp, GEN_INT (offset)));
1562 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1563 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1564 pic_offset_table_rtx, tmp));
1567 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1570 emit_move_insn (pic_offset_table_rtx, tmp);
1574 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1575 rtx scratch_b, int noreturn_p, int sibcall_p)
1578 bool is_desc = false;
1580 /* If we find we're calling through a register, then we're actually
1581 calling through a descriptor, so load up the values. */
1582 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1587 /* ??? We are currently constrained to *not* use peep2, because
1588 we can legitimately change the global lifetime of the GP
1589 (in the form of killing where previously live). This is
1590 because a call through a descriptor doesn't use the previous
1591 value of the GP, while a direct call does, and we do not
1592 commit to either form until the split here.
1594 That said, this means that we lack precise life info for
1595 whether ADDR is dead after this call. This is not terribly
1596 important, since we can fix things up essentially for free
1597 with the POST_DEC below, but it's nice to not use it when we
1598 can immediately tell it's not necessary. */
1599 addr_dead_p = ((noreturn_p || sibcall_p
1600 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1602 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1604 /* Load the code address into scratch_b. */
1605 tmp = gen_rtx_POST_INC (Pmode, addr);
1606 tmp = gen_rtx_MEM (Pmode, tmp);
1607 emit_move_insn (scratch_r, tmp);
1608 emit_move_insn (scratch_b, scratch_r);
1610 /* Load the GP address. If ADDR is not dead here, then we must
1611 revert the change made above via the POST_INCREMENT. */
1613 tmp = gen_rtx_POST_DEC (Pmode, addr);
1616 tmp = gen_rtx_MEM (Pmode, tmp);
1617 emit_move_insn (pic_offset_table_rtx, tmp);
1624 insn = gen_sibcall_nogp (addr);
1626 insn = gen_call_value_nogp (retval, addr, retaddr);
1628 insn = gen_call_nogp (addr, retaddr);
1629 emit_call_insn (insn);
1631 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1635 /* Begin the assembly file. */
1638 ia64_file_start (void)
1640 default_file_start ();
1641 emit_safe_across_calls ();
1645 emit_safe_across_calls (void)
1647 unsigned int rs, re;
1654 while (rs < 64 && call_used_regs[PR_REG (rs)])
1658 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1662 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1666 fputc (',', asm_out_file);
1668 fprintf (asm_out_file, "p%u", rs);
1670 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1674 fputc ('\n', asm_out_file);
1677 /* Helper function for ia64_compute_frame_size: find an appropriate general
1678 register to spill some special register to. SPECIAL_SPILL_MASK contains
1679 bits in GR0 to GR31 that have already been allocated by this routine.
1680 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1683 find_gr_spill (int try_locals)
1687 /* If this is a leaf function, first try an otherwise unused
1688 call-clobbered register. */
1689 if (current_function_is_leaf)
1691 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1692 if (! regs_ever_live[regno]
1693 && call_used_regs[regno]
1694 && ! fixed_regs[regno]
1695 && ! global_regs[regno]
1696 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1698 current_frame_info.gr_used_mask |= 1 << regno;
1705 regno = current_frame_info.n_local_regs;
1706 /* If there is a frame pointer, then we can't use loc79, because
1707 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1708 reg_name switching code in ia64_expand_prologue. */
1709 if (regno < (80 - frame_pointer_needed))
1711 current_frame_info.n_local_regs = regno + 1;
1712 return LOC_REG (0) + regno;
1716 /* Failed to find a general register to spill to. Must use stack. */
1720 /* In order to make for nice schedules, we try to allocate every temporary
1721 to a different register. We must of course stay away from call-saved,
1722 fixed, and global registers. We must also stay away from registers
1723 allocated in current_frame_info.gr_used_mask, since those include regs
1724 used all through the prologue.
1726 Any register allocated here must be used immediately. The idea is to
1727 aid scheduling, not to solve data flow problems. */
1729 static int last_scratch_gr_reg;
1732 next_scratch_gr_reg (void)
1736 for (i = 0; i < 32; ++i)
1738 regno = (last_scratch_gr_reg + i + 1) & 31;
1739 if (call_used_regs[regno]
1740 && ! fixed_regs[regno]
1741 && ! global_regs[regno]
1742 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1744 last_scratch_gr_reg = regno;
1749 /* There must be _something_ available. */
1753 /* Helper function for ia64_compute_frame_size, called through
1754 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1757 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
1759 unsigned int regno = REGNO (reg);
1762 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1763 for (i = 0; i < n; ++i)
1764 current_frame_info.gr_used_mask |= 1 << (regno + i);
1768 /* Returns the number of bytes offset between the frame pointer and the stack
1769 pointer for the current function. SIZE is the number of bytes of space
1770 needed for local variables. */
1773 ia64_compute_frame_size (HOST_WIDE_INT size)
1775 HOST_WIDE_INT total_size;
1776 HOST_WIDE_INT spill_size = 0;
1777 HOST_WIDE_INT extra_spill_size = 0;
1778 HOST_WIDE_INT pretend_args_size;
1781 int spilled_gr_p = 0;
1782 int spilled_fr_p = 0;
1786 if (current_frame_info.initialized)
1789 memset (¤t_frame_info, 0, sizeof current_frame_info);
1790 CLEAR_HARD_REG_SET (mask);
1792 /* Don't allocate scratches to the return register. */
1793 diddle_return_value (mark_reg_gr_used_mask, NULL);
1795 /* Don't allocate scratches to the EH scratch registers. */
1796 if (cfun->machine->ia64_eh_epilogue_sp)
1797 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1798 if (cfun->machine->ia64_eh_epilogue_bsp)
1799 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1801 /* Find the size of the register stack frame. We have only 80 local
1802 registers, because we reserve 8 for the inputs and 8 for the
1805 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1806 since we'll be adjusting that down later. */
1807 regno = LOC_REG (78) + ! frame_pointer_needed;
1808 for (; regno >= LOC_REG (0); regno--)
1809 if (regs_ever_live[regno])
1811 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1813 /* For functions marked with the syscall_linkage attribute, we must mark
1814 all eight input registers as in use, so that locals aren't visible to
1817 if (cfun->machine->n_varargs > 0
1818 || lookup_attribute ("syscall_linkage",
1819 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1820 current_frame_info.n_input_regs = 8;
1823 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1824 if (regs_ever_live[regno])
1826 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1829 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1830 if (regs_ever_live[regno])
1832 i = regno - OUT_REG (0) + 1;
1834 /* When -p profiling, we need one output register for the mcount argument.
1835 Likewise for -a profiling for the bb_init_func argument. For -ax
1836 profiling, we need two output registers for the two bb_init_trace_func
1838 if (current_function_profile)
1840 current_frame_info.n_output_regs = i;
1842 /* ??? No rotating register support yet. */
1843 current_frame_info.n_rotate_regs = 0;
1845 /* Discover which registers need spilling, and how much room that
1846 will take. Begin with floating point and general registers,
1847 which will always wind up on the stack. */
1849 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1850 if (regs_ever_live[regno] && ! call_used_regs[regno])
1852 SET_HARD_REG_BIT (mask, regno);
1858 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1859 if (regs_ever_live[regno] && ! call_used_regs[regno])
1861 SET_HARD_REG_BIT (mask, regno);
1867 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1868 if (regs_ever_live[regno] && ! call_used_regs[regno])
1870 SET_HARD_REG_BIT (mask, regno);
1875 /* Now come all special registers that might get saved in other
1876 general registers. */
1878 if (frame_pointer_needed)
1880 current_frame_info.reg_fp = find_gr_spill (1);
1881 /* If we did not get a register, then we take LOC79. This is guaranteed
1882 to be free, even if regs_ever_live is already set, because this is
1883 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1884 as we don't count loc79 above. */
1885 if (current_frame_info.reg_fp == 0)
1887 current_frame_info.reg_fp = LOC_REG (79);
1888 current_frame_info.n_local_regs++;
1892 if (! current_function_is_leaf)
1894 /* Emit a save of BR0 if we call other functions. Do this even
1895 if this function doesn't return, as EH depends on this to be
1896 able to unwind the stack. */
1897 SET_HARD_REG_BIT (mask, BR_REG (0));
1899 current_frame_info.reg_save_b0 = find_gr_spill (1);
1900 if (current_frame_info.reg_save_b0 == 0)
1906 /* Similarly for ar.pfs. */
1907 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1908 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1909 if (current_frame_info.reg_save_ar_pfs == 0)
1911 extra_spill_size += 8;
1915 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1916 registers are clobbered, so we fall back to the stack. */
1917 current_frame_info.reg_save_gp
1918 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1919 if (current_frame_info.reg_save_gp == 0)
1921 SET_HARD_REG_BIT (mask, GR_REG (1));
1928 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1930 SET_HARD_REG_BIT (mask, BR_REG (0));
1935 if (regs_ever_live[AR_PFS_REGNUM])
1937 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1938 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1939 if (current_frame_info.reg_save_ar_pfs == 0)
1941 extra_spill_size += 8;
1947 /* Unwind descriptor hackery: things are most efficient if we allocate
1948 consecutive GR save registers for RP, PFS, FP in that order. However,
1949 it is absolutely critical that FP get the only hard register that's
1950 guaranteed to be free, so we allocated it first. If all three did
1951 happen to be allocated hard regs, and are consecutive, rearrange them
1952 into the preferred order now. */
1953 if (current_frame_info.reg_fp != 0
1954 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1955 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1957 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1958 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1959 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1962 /* See if we need to store the predicate register block. */
1963 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1964 if (regs_ever_live[regno] && ! call_used_regs[regno])
1966 if (regno <= PR_REG (63))
1968 SET_HARD_REG_BIT (mask, PR_REG (0));
1969 current_frame_info.reg_save_pr = find_gr_spill (1);
1970 if (current_frame_info.reg_save_pr == 0)
1972 extra_spill_size += 8;
1976 /* ??? Mark them all as used so that register renaming and such
1977 are free to use them. */
1978 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1979 regs_ever_live[regno] = 1;
1982 /* If we're forced to use st8.spill, we're forced to save and restore
1983 ar.unat as well. The check for existing liveness allows inline asm
1984 to touch ar.unat. */
1985 if (spilled_gr_p || cfun->machine->n_varargs
1986 || regs_ever_live[AR_UNAT_REGNUM])
1988 regs_ever_live[AR_UNAT_REGNUM] = 1;
1989 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1990 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1991 if (current_frame_info.reg_save_ar_unat == 0)
1993 extra_spill_size += 8;
1998 if (regs_ever_live[AR_LC_REGNUM])
2000 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2001 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2002 if (current_frame_info.reg_save_ar_lc == 0)
2004 extra_spill_size += 8;
2009 /* If we have an odd number of words of pretend arguments written to
2010 the stack, then the FR save area will be unaligned. We round the
2011 size of this area up to keep things 16 byte aligned. */
2013 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2015 pretend_args_size = current_function_pretend_args_size;
2017 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2018 + current_function_outgoing_args_size);
2019 total_size = IA64_STACK_ALIGN (total_size);
2021 /* We always use the 16-byte scratch area provided by the caller, but
2022 if we are a leaf function, there's no one to which we need to provide
2024 if (current_function_is_leaf)
2025 total_size = MAX (0, total_size - 16);
2027 current_frame_info.total_size = total_size;
2028 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2029 current_frame_info.spill_size = spill_size;
2030 current_frame_info.extra_spill_size = extra_spill_size;
2031 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2032 current_frame_info.n_spilled = n_spilled;
2033 current_frame_info.initialized = reload_completed;
2036 /* Compute the initial difference between the specified pair of registers. */
2039 ia64_initial_elimination_offset (int from, int to)
2041 HOST_WIDE_INT offset;
2043 ia64_compute_frame_size (get_frame_size ());
2046 case FRAME_POINTER_REGNUM:
2047 if (to == HARD_FRAME_POINTER_REGNUM)
2049 if (current_function_is_leaf)
2050 offset = -current_frame_info.total_size;
2052 offset = -(current_frame_info.total_size
2053 - current_function_outgoing_args_size - 16);
2055 else if (to == STACK_POINTER_REGNUM)
2057 if (current_function_is_leaf)
2060 offset = 16 + current_function_outgoing_args_size;
2066 case ARG_POINTER_REGNUM:
2067 /* Arguments start above the 16 byte save area, unless stdarg
2068 in which case we store through the 16 byte save area. */
2069 if (to == HARD_FRAME_POINTER_REGNUM)
2070 offset = 16 - current_function_pretend_args_size;
2071 else if (to == STACK_POINTER_REGNUM)
2072 offset = (current_frame_info.total_size
2073 + 16 - current_function_pretend_args_size);
2085 /* If there are more than a trivial number of register spills, we use
2086 two interleaved iterators so that we can get two memory references
2089 In order to simplify things in the prologue and epilogue expanders,
2090 we use helper functions to fix up the memory references after the
2091 fact with the appropriate offsets to a POST_MODIFY memory mode.
2092 The following data structure tracks the state of the two iterators
2093 while insns are being emitted. */
2095 struct spill_fill_data
2097 rtx init_after; /* point at which to emit initializations */
2098 rtx init_reg[2]; /* initial base register */
2099 rtx iter_reg[2]; /* the iterator registers */
2100 rtx *prev_addr[2]; /* address of last memory use */
2101 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2102 HOST_WIDE_INT prev_off[2]; /* last offset */
2103 int n_iter; /* number of iterators in use */
2104 int next_iter; /* next iterator to use */
2105 unsigned int save_gr_used_mask;
2108 static struct spill_fill_data spill_fill_data;
2111 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2115 spill_fill_data.init_after = get_last_insn ();
2116 spill_fill_data.init_reg[0] = init_reg;
2117 spill_fill_data.init_reg[1] = init_reg;
2118 spill_fill_data.prev_addr[0] = NULL;
2119 spill_fill_data.prev_addr[1] = NULL;
2120 spill_fill_data.prev_insn[0] = NULL;
2121 spill_fill_data.prev_insn[1] = NULL;
2122 spill_fill_data.prev_off[0] = cfa_off;
2123 spill_fill_data.prev_off[1] = cfa_off;
2124 spill_fill_data.next_iter = 0;
2125 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2127 spill_fill_data.n_iter = 1 + (n_spills > 2);
2128 for (i = 0; i < spill_fill_data.n_iter; ++i)
2130 int regno = next_scratch_gr_reg ();
2131 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2132 current_frame_info.gr_used_mask |= 1 << regno;
2137 finish_spill_pointers (void)
2139 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2143 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2145 int iter = spill_fill_data.next_iter;
2146 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2147 rtx disp_rtx = GEN_INT (disp);
2150 if (spill_fill_data.prev_addr[iter])
2152 if (CONST_OK_FOR_N (disp))
2154 *spill_fill_data.prev_addr[iter]
2155 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2156 gen_rtx_PLUS (DImode,
2157 spill_fill_data.iter_reg[iter],
2159 REG_NOTES (spill_fill_data.prev_insn[iter])
2160 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2161 REG_NOTES (spill_fill_data.prev_insn[iter]));
2165 /* ??? Could use register post_modify for loads. */
2166 if (! CONST_OK_FOR_I (disp))
2168 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2169 emit_move_insn (tmp, disp_rtx);
2172 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2173 spill_fill_data.iter_reg[iter], disp_rtx));
2176 /* Micro-optimization: if we've created a frame pointer, it's at
2177 CFA 0, which may allow the real iterator to be initialized lower,
2178 slightly increasing parallelism. Also, if there are few saves
2179 it may eliminate the iterator entirely. */
2181 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2182 && frame_pointer_needed)
2184 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2185 set_mem_alias_set (mem, get_varargs_alias_set ());
2193 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2194 spill_fill_data.init_reg[iter]);
2199 if (! CONST_OK_FOR_I (disp))
2201 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2202 emit_move_insn (tmp, disp_rtx);
2206 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2207 spill_fill_data.init_reg[iter],
2214 /* Careful for being the first insn in a sequence. */
2215 if (spill_fill_data.init_after)
2216 insn = emit_insn_after (seq, spill_fill_data.init_after);
2219 rtx first = get_insns ();
2221 insn = emit_insn_before (seq, first);
2223 insn = emit_insn (seq);
2225 spill_fill_data.init_after = insn;
2227 /* If DISP is 0, we may or may not have a further adjustment
2228 afterward. If we do, then the load/store insn may be modified
2229 to be a post-modify. If we don't, then this copy may be
2230 eliminated by copyprop_hardreg_forward, which makes this
2231 insn garbage, which runs afoul of the sanity check in
2232 propagate_one_insn. So mark this insn as legal to delete. */
2234 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2238 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2240 /* ??? Not all of the spills are for varargs, but some of them are.
2241 The rest of the spills belong in an alias set of their own. But
2242 it doesn't actually hurt to include them here. */
2243 set_mem_alias_set (mem, get_varargs_alias_set ());
2245 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2246 spill_fill_data.prev_off[iter] = cfa_off;
2248 if (++iter >= spill_fill_data.n_iter)
2250 spill_fill_data.next_iter = iter;
2256 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2259 int iter = spill_fill_data.next_iter;
2262 mem = spill_restore_mem (reg, cfa_off);
2263 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2264 spill_fill_data.prev_insn[iter] = insn;
2271 RTX_FRAME_RELATED_P (insn) = 1;
2273 /* Don't even pretend that the unwind code can intuit its way
2274 through a pair of interleaved post_modify iterators. Just
2275 provide the correct answer. */
2277 if (frame_pointer_needed)
2279 base = hard_frame_pointer_rtx;
2284 base = stack_pointer_rtx;
2285 off = current_frame_info.total_size - cfa_off;
2289 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2290 gen_rtx_SET (VOIDmode,
2291 gen_rtx_MEM (GET_MODE (reg),
2292 plus_constant (base, off)),
2299 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2301 int iter = spill_fill_data.next_iter;
2304 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2305 GEN_INT (cfa_off)));
2306 spill_fill_data.prev_insn[iter] = insn;
2309 /* Wrapper functions that discards the CONST_INT spill offset. These
2310 exist so that we can give gr_spill/gr_fill the offset they need and
2311 use a consistent function interface. */
2314 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2316 return gen_movdi (dest, src);
2320 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2322 return gen_fr_spill (dest, src);
2326 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2328 return gen_fr_restore (dest, src);
2331 /* Called after register allocation to add any instructions needed for the
2332 prologue. Using a prologue insn is favored compared to putting all of the
2333 instructions in output_function_prologue(), since it allows the scheduler
2334 to intermix instructions with the saves of the caller saved registers. In
2335 some cases, it might be necessary to emit a barrier instruction as the last
2336 insn to prevent such scheduling.
2338 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2339 so that the debug info generation code can handle them properly.
2341 The register save area is layed out like so:
2343 [ varargs spill area ]
2344 [ fr register spill area ]
2345 [ br register spill area ]
2346 [ ar register spill area ]
2347 [ pr register spill area ]
2348 [ gr register spill area ] */
2350 /* ??? Get inefficient code when the frame size is larger than can fit in an
2351 adds instruction. */
2354 ia64_expand_prologue (void)
2356 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2357 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2360 ia64_compute_frame_size (get_frame_size ());
2361 last_scratch_gr_reg = 15;
2363 /* If there is no epilogue, then we don't need some prologue insns.
2364 We need to avoid emitting the dead prologue insns, because flow
2365 will complain about them. */
2370 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2371 if ((e->flags & EDGE_FAKE) == 0
2372 && (e->flags & EDGE_FALLTHRU) != 0)
2374 epilogue_p = (e != NULL);
2379 /* Set the local, input, and output register names. We need to do this
2380 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2381 half. If we use in/loc/out register names, then we get assembler errors
2382 in crtn.S because there is no alloc insn or regstk directive in there. */
2383 if (! TARGET_REG_NAMES)
2385 int inputs = current_frame_info.n_input_regs;
2386 int locals = current_frame_info.n_local_regs;
2387 int outputs = current_frame_info.n_output_regs;
2389 for (i = 0; i < inputs; i++)
2390 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2391 for (i = 0; i < locals; i++)
2392 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2393 for (i = 0; i < outputs; i++)
2394 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2397 /* Set the frame pointer register name. The regnum is logically loc79,
2398 but of course we'll not have allocated that many locals. Rather than
2399 worrying about renumbering the existing rtxs, we adjust the name. */
2400 /* ??? This code means that we can never use one local register when
2401 there is a frame pointer. loc79 gets wasted in this case, as it is
2402 renamed to a register that will never be used. See also the try_locals
2403 code in find_gr_spill. */
2404 if (current_frame_info.reg_fp)
2406 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2407 reg_names[HARD_FRAME_POINTER_REGNUM]
2408 = reg_names[current_frame_info.reg_fp];
2409 reg_names[current_frame_info.reg_fp] = tmp;
2412 /* We don't need an alloc instruction if we've used no outputs or locals. */
2413 if (current_frame_info.n_local_regs == 0
2414 && current_frame_info.n_output_regs == 0
2415 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2416 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2418 /* If there is no alloc, but there are input registers used, then we
2419 need a .regstk directive. */
2420 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2421 ar_pfs_save_reg = NULL_RTX;
2425 current_frame_info.need_regstk = 0;
2427 if (current_frame_info.reg_save_ar_pfs)
2428 regno = current_frame_info.reg_save_ar_pfs;
2430 regno = next_scratch_gr_reg ();
2431 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2433 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2434 GEN_INT (current_frame_info.n_input_regs),
2435 GEN_INT (current_frame_info.n_local_regs),
2436 GEN_INT (current_frame_info.n_output_regs),
2437 GEN_INT (current_frame_info.n_rotate_regs)));
2438 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2441 /* Set up frame pointer, stack pointer, and spill iterators. */
2443 n_varargs = cfun->machine->n_varargs;
2444 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2445 stack_pointer_rtx, 0);
2447 if (frame_pointer_needed)
2449 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2450 RTX_FRAME_RELATED_P (insn) = 1;
2453 if (current_frame_info.total_size != 0)
2455 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2458 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2459 offset = frame_size_rtx;
2462 regno = next_scratch_gr_reg ();
2463 offset = gen_rtx_REG (DImode, regno);
2464 emit_move_insn (offset, frame_size_rtx);
2467 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2468 stack_pointer_rtx, offset));
2470 if (! frame_pointer_needed)
2472 RTX_FRAME_RELATED_P (insn) = 1;
2473 if (GET_CODE (offset) != CONST_INT)
2476 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2477 gen_rtx_SET (VOIDmode,
2479 gen_rtx_PLUS (DImode,
2486 /* ??? At this point we must generate a magic insn that appears to
2487 modify the stack pointer, the frame pointer, and all spill
2488 iterators. This would allow the most scheduling freedom. For
2489 now, just hard stop. */
2490 emit_insn (gen_blockage ());
2493 /* Must copy out ar.unat before doing any integer spills. */
2494 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2496 if (current_frame_info.reg_save_ar_unat)
2498 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2501 alt_regno = next_scratch_gr_reg ();
2502 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2503 current_frame_info.gr_used_mask |= 1 << alt_regno;
2506 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2507 insn = emit_move_insn (ar_unat_save_reg, reg);
2508 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2510 /* Even if we're not going to generate an epilogue, we still
2511 need to save the register so that EH works. */
2512 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2513 emit_insn (gen_prologue_use (ar_unat_save_reg));
2516 ar_unat_save_reg = NULL_RTX;
2518 /* Spill all varargs registers. Do this before spilling any GR registers,
2519 since we want the UNAT bits for the GR registers to override the UNAT
2520 bits from varargs, which we don't care about. */
2523 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2525 reg = gen_rtx_REG (DImode, regno);
2526 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2529 /* Locate the bottom of the register save area. */
2530 cfa_off = (current_frame_info.spill_cfa_off
2531 + current_frame_info.spill_size
2532 + current_frame_info.extra_spill_size);
2534 /* Save the predicate register block either in a register or in memory. */
2535 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2537 reg = gen_rtx_REG (DImode, PR_REG (0));
2538 if (current_frame_info.reg_save_pr != 0)
2540 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2541 insn = emit_move_insn (alt_reg, reg);
2543 /* ??? Denote pr spill/fill by a DImode move that modifies all
2544 64 hard registers. */
2545 RTX_FRAME_RELATED_P (insn) = 1;
2547 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2548 gen_rtx_SET (VOIDmode, alt_reg, reg),
2551 /* Even if we're not going to generate an epilogue, we still
2552 need to save the register so that EH works. */
2554 emit_insn (gen_prologue_use (alt_reg));
2558 alt_regno = next_scratch_gr_reg ();
2559 alt_reg = gen_rtx_REG (DImode, alt_regno);
2560 insn = emit_move_insn (alt_reg, reg);
2561 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2566 /* Handle AR regs in numerical order. All of them get special handling. */
2567 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2568 && current_frame_info.reg_save_ar_unat == 0)
2570 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2571 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2575 /* The alloc insn already copied ar.pfs into a general register. The
2576 only thing we have to do now is copy that register to a stack slot
2577 if we'd not allocated a local register for the job. */
2578 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2579 && current_frame_info.reg_save_ar_pfs == 0)
2581 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2582 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2586 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2588 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2589 if (current_frame_info.reg_save_ar_lc != 0)
2591 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2592 insn = emit_move_insn (alt_reg, reg);
2593 RTX_FRAME_RELATED_P (insn) = 1;
2595 /* Even if we're not going to generate an epilogue, we still
2596 need to save the register so that EH works. */
2598 emit_insn (gen_prologue_use (alt_reg));
2602 alt_regno = next_scratch_gr_reg ();
2603 alt_reg = gen_rtx_REG (DImode, alt_regno);
2604 emit_move_insn (alt_reg, reg);
2605 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2610 if (current_frame_info.reg_save_gp)
2612 insn = emit_move_insn (gen_rtx_REG (DImode,
2613 current_frame_info.reg_save_gp),
2614 pic_offset_table_rtx);
2615 /* We don't know for sure yet if this is actually needed, since
2616 we've not split the PIC call patterns. If all of the calls
2617 are indirect, and not followed by any uses of the gp, then
2618 this save is dead. Allow it to go away. */
2620 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2623 /* We should now be at the base of the gr/br/fr spill area. */
2624 if (cfa_off != (current_frame_info.spill_cfa_off
2625 + current_frame_info.spill_size))
2628 /* Spill all general registers. */
2629 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2630 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2632 reg = gen_rtx_REG (DImode, regno);
2633 do_spill (gen_gr_spill, reg, cfa_off, reg);
2637 /* Handle BR0 specially -- it may be getting stored permanently in
2638 some GR register. */
2639 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2641 reg = gen_rtx_REG (DImode, BR_REG (0));
2642 if (current_frame_info.reg_save_b0 != 0)
2644 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2645 insn = emit_move_insn (alt_reg, reg);
2646 RTX_FRAME_RELATED_P (insn) = 1;
2648 /* Even if we're not going to generate an epilogue, we still
2649 need to save the register so that EH works. */
2651 emit_insn (gen_prologue_use (alt_reg));
2655 alt_regno = next_scratch_gr_reg ();
2656 alt_reg = gen_rtx_REG (DImode, alt_regno);
2657 emit_move_insn (alt_reg, reg);
2658 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2663 /* Spill the rest of the BR registers. */
2664 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2665 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2667 alt_regno = next_scratch_gr_reg ();
2668 alt_reg = gen_rtx_REG (DImode, alt_regno);
2669 reg = gen_rtx_REG (DImode, regno);
2670 emit_move_insn (alt_reg, reg);
2671 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2675 /* Align the frame and spill all FR registers. */
2676 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2677 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2681 reg = gen_rtx_REG (TFmode, regno);
2682 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2686 if (cfa_off != current_frame_info.spill_cfa_off)
2689 finish_spill_pointers ();
2692 /* Called after register allocation to add any instructions needed for the
2693 epilogue. Using an epilogue insn is favored compared to putting all of the
2694 instructions in output_function_prologue(), since it allows the scheduler
2695 to intermix instructions with the saves of the caller saved registers. In
2696 some cases, it might be necessary to emit a barrier instruction as the last
2697 insn to prevent such scheduling. */
2700 ia64_expand_epilogue (int sibcall_p)
2702 rtx insn, reg, alt_reg, ar_unat_save_reg;
2703 int regno, alt_regno, cfa_off;
2705 ia64_compute_frame_size (get_frame_size ());
2707 /* If there is a frame pointer, then we use it instead of the stack
2708 pointer, so that the stack pointer does not need to be valid when
2709 the epilogue starts. See EXIT_IGNORE_STACK. */
2710 if (frame_pointer_needed)
2711 setup_spill_pointers (current_frame_info.n_spilled,
2712 hard_frame_pointer_rtx, 0);
2714 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2715 current_frame_info.total_size);
2717 if (current_frame_info.total_size != 0)
2719 /* ??? At this point we must generate a magic insn that appears to
2720 modify the spill iterators and the frame pointer. This would
2721 allow the most scheduling freedom. For now, just hard stop. */
2722 emit_insn (gen_blockage ());
2725 /* Locate the bottom of the register save area. */
2726 cfa_off = (current_frame_info.spill_cfa_off
2727 + current_frame_info.spill_size
2728 + current_frame_info.extra_spill_size);
2730 /* Restore the predicate registers. */
2731 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2733 if (current_frame_info.reg_save_pr != 0)
2734 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2737 alt_regno = next_scratch_gr_reg ();
2738 alt_reg = gen_rtx_REG (DImode, alt_regno);
2739 do_restore (gen_movdi_x, alt_reg, cfa_off);
2742 reg = gen_rtx_REG (DImode, PR_REG (0));
2743 emit_move_insn (reg, alt_reg);
2746 /* Restore the application registers. */
2748 /* Load the saved unat from the stack, but do not restore it until
2749 after the GRs have been restored. */
2750 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2752 if (current_frame_info.reg_save_ar_unat != 0)
2754 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2757 alt_regno = next_scratch_gr_reg ();
2758 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2759 current_frame_info.gr_used_mask |= 1 << alt_regno;
2760 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2765 ar_unat_save_reg = NULL_RTX;
2767 if (current_frame_info.reg_save_ar_pfs != 0)
2769 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2770 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2771 emit_move_insn (reg, alt_reg);
2773 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2775 alt_regno = next_scratch_gr_reg ();
2776 alt_reg = gen_rtx_REG (DImode, alt_regno);
2777 do_restore (gen_movdi_x, alt_reg, cfa_off);
2779 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2780 emit_move_insn (reg, alt_reg);
2783 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2785 if (current_frame_info.reg_save_ar_lc != 0)
2786 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2789 alt_regno = next_scratch_gr_reg ();
2790 alt_reg = gen_rtx_REG (DImode, alt_regno);
2791 do_restore (gen_movdi_x, alt_reg, cfa_off);
2794 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2795 emit_move_insn (reg, alt_reg);
2798 /* We should now be at the base of the gr/br/fr spill area. */
2799 if (cfa_off != (current_frame_info.spill_cfa_off
2800 + current_frame_info.spill_size))
2803 /* The GP may be stored on the stack in the prologue, but it's
2804 never restored in the epilogue. Skip the stack slot. */
2805 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2808 /* Restore all general registers. */
2809 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2810 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2812 reg = gen_rtx_REG (DImode, regno);
2813 do_restore (gen_gr_restore, reg, cfa_off);
2817 /* Restore the branch registers. Handle B0 specially, as it may
2818 have gotten stored in some GR register. */
2819 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2821 if (current_frame_info.reg_save_b0 != 0)
2822 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2825 alt_regno = next_scratch_gr_reg ();
2826 alt_reg = gen_rtx_REG (DImode, alt_regno);
2827 do_restore (gen_movdi_x, alt_reg, cfa_off);
2830 reg = gen_rtx_REG (DImode, BR_REG (0));
2831 emit_move_insn (reg, alt_reg);
2834 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2835 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2837 alt_regno = next_scratch_gr_reg ();
2838 alt_reg = gen_rtx_REG (DImode, alt_regno);
2839 do_restore (gen_movdi_x, alt_reg, cfa_off);
2841 reg = gen_rtx_REG (DImode, regno);
2842 emit_move_insn (reg, alt_reg);
2845 /* Restore floating point registers. */
2846 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2847 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2851 reg = gen_rtx_REG (TFmode, regno);
2852 do_restore (gen_fr_restore_x, reg, cfa_off);
2856 /* Restore ar.unat for real. */
2857 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2859 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2860 emit_move_insn (reg, ar_unat_save_reg);
2863 if (cfa_off != current_frame_info.spill_cfa_off)
2866 finish_spill_pointers ();
2868 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2870 /* ??? At this point we must generate a magic insn that appears to
2871 modify the spill iterators, the stack pointer, and the frame
2872 pointer. This would allow the most scheduling freedom. For now,
2874 emit_insn (gen_blockage ());
2877 if (cfun->machine->ia64_eh_epilogue_sp)
2878 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2879 else if (frame_pointer_needed)
2881 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2882 RTX_FRAME_RELATED_P (insn) = 1;
2884 else if (current_frame_info.total_size)
2886 rtx offset, frame_size_rtx;
2888 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2889 if (CONST_OK_FOR_I (current_frame_info.total_size))
2890 offset = frame_size_rtx;
2893 regno = next_scratch_gr_reg ();
2894 offset = gen_rtx_REG (DImode, regno);
2895 emit_move_insn (offset, frame_size_rtx);
2898 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2901 RTX_FRAME_RELATED_P (insn) = 1;
2902 if (GET_CODE (offset) != CONST_INT)
2905 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2906 gen_rtx_SET (VOIDmode,
2908 gen_rtx_PLUS (DImode,
2915 if (cfun->machine->ia64_eh_epilogue_bsp)
2916 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2919 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2922 int fp = GR_REG (2);
2923 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2924 first available call clobbered register. If there was a frame_pointer
2925 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2926 so we have to make sure we're using the string "r2" when emitting
2927 the register name for the assembler. */
2928 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2929 fp = HARD_FRAME_POINTER_REGNUM;
2931 /* We must emit an alloc to force the input registers to become output
2932 registers. Otherwise, if the callee tries to pass its parameters
2933 through to another call without an intervening alloc, then these
2935 /* ??? We don't need to preserve all input registers. We only need to
2936 preserve those input registers used as arguments to the sibling call.
2937 It is unclear how to compute that number here. */
2938 if (current_frame_info.n_input_regs != 0)
2939 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2940 GEN_INT (0), GEN_INT (0),
2941 GEN_INT (current_frame_info.n_input_regs),
2946 /* Return 1 if br.ret can do all the work required to return from a
2950 ia64_direct_return (void)
2952 if (reload_completed && ! frame_pointer_needed)
2954 ia64_compute_frame_size (get_frame_size ());
2956 return (current_frame_info.total_size == 0
2957 && current_frame_info.n_spilled == 0
2958 && current_frame_info.reg_save_b0 == 0
2959 && current_frame_info.reg_save_pr == 0
2960 && current_frame_info.reg_save_ar_pfs == 0
2961 && current_frame_info.reg_save_ar_unat == 0
2962 && current_frame_info.reg_save_ar_lc == 0);
2967 /* Return the magic cookie that we use to hold the return address
2968 during early compilation. */
2971 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
2975 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
2978 /* Split this value after reload, now that we know where the return
2979 address is saved. */
2982 ia64_split_return_addr_rtx (rtx dest)
2986 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2988 if (current_frame_info.reg_save_b0 != 0)
2989 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2995 /* Compute offset from CFA for BR0. */
2996 /* ??? Must be kept in sync with ia64_expand_prologue. */
2997 off = (current_frame_info.spill_cfa_off
2998 + current_frame_info.spill_size);
2999 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3000 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3003 /* Convert CFA offset to a register based offset. */
3004 if (frame_pointer_needed)
3005 src = hard_frame_pointer_rtx;
3008 src = stack_pointer_rtx;
3009 off += current_frame_info.total_size;
3012 /* Load address into scratch register. */
3013 if (CONST_OK_FOR_I (off))
3014 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3017 emit_move_insn (dest, GEN_INT (off));
3018 emit_insn (gen_adddi3 (dest, src, dest));
3021 src = gen_rtx_MEM (Pmode, dest);
3025 src = gen_rtx_REG (DImode, BR_REG (0));
3027 emit_move_insn (dest, src);
3031 ia64_hard_regno_rename_ok (int from, int to)
3033 /* Don't clobber any of the registers we reserved for the prologue. */
3034 if (to == current_frame_info.reg_fp
3035 || to == current_frame_info.reg_save_b0
3036 || to == current_frame_info.reg_save_pr
3037 || to == current_frame_info.reg_save_ar_pfs
3038 || to == current_frame_info.reg_save_ar_unat
3039 || to == current_frame_info.reg_save_ar_lc)
3042 if (from == current_frame_info.reg_fp
3043 || from == current_frame_info.reg_save_b0
3044 || from == current_frame_info.reg_save_pr
3045 || from == current_frame_info.reg_save_ar_pfs
3046 || from == current_frame_info.reg_save_ar_unat
3047 || from == current_frame_info.reg_save_ar_lc)
3050 /* Don't use output registers outside the register frame. */
3051 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3054 /* Retain even/oddness on predicate register pairs. */
3055 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3056 return (from & 1) == (to & 1);
3061 /* Target hook for assembling integer objects. Handle word-sized
3062 aligned objects and detect the cases when @fptr is needed. */
3065 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3067 if (size == (TARGET_ILP32 ? 4 : 8)
3069 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3070 && GET_CODE (x) == SYMBOL_REF
3071 && SYMBOL_REF_FUNCTION_P (x))
3074 fputs ("\tdata4\t@fptr(", asm_out_file);
3076 fputs ("\tdata8\t@fptr(", asm_out_file);
3077 output_addr_const (asm_out_file, x);
3078 fputs (")\n", asm_out_file);
3081 return default_assemble_integer (x, size, aligned_p);
3084 /* Emit the function prologue. */
3087 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3089 int mask, grsave, grsave_prev;
3091 if (current_frame_info.need_regstk)
3092 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3093 current_frame_info.n_input_regs,
3094 current_frame_info.n_local_regs,
3095 current_frame_info.n_output_regs,
3096 current_frame_info.n_rotate_regs);
3098 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3101 /* Emit the .prologue directive. */
3104 grsave = grsave_prev = 0;
3105 if (current_frame_info.reg_save_b0 != 0)
3108 grsave = grsave_prev = current_frame_info.reg_save_b0;
3110 if (current_frame_info.reg_save_ar_pfs != 0
3111 && (grsave_prev == 0
3112 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3115 if (grsave_prev == 0)
3116 grsave = current_frame_info.reg_save_ar_pfs;
3117 grsave_prev = current_frame_info.reg_save_ar_pfs;
3119 if (current_frame_info.reg_fp != 0
3120 && (grsave_prev == 0
3121 || current_frame_info.reg_fp == grsave_prev + 1))
3124 if (grsave_prev == 0)
3125 grsave = HARD_FRAME_POINTER_REGNUM;
3126 grsave_prev = current_frame_info.reg_fp;
3128 if (current_frame_info.reg_save_pr != 0
3129 && (grsave_prev == 0
3130 || current_frame_info.reg_save_pr == grsave_prev + 1))
3133 if (grsave_prev == 0)
3134 grsave = current_frame_info.reg_save_pr;
3137 if (mask && TARGET_GNU_AS)
3138 fprintf (file, "\t.prologue %d, %d\n", mask,
3139 ia64_dbx_register_number (grsave));
3141 fputs ("\t.prologue\n", file);
3143 /* Emit a .spill directive, if necessary, to relocate the base of
3144 the register spill area. */
3145 if (current_frame_info.spill_cfa_off != -16)
3146 fprintf (file, "\t.spill %ld\n",
3147 (long) (current_frame_info.spill_cfa_off
3148 + current_frame_info.spill_size));
3151 /* Emit the .body directive at the scheduled end of the prologue. */
3154 ia64_output_function_end_prologue (FILE *file)
3156 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3159 fputs ("\t.body\n", file);
3162 /* Emit the function epilogue. */
3165 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3166 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3170 if (current_frame_info.reg_fp)
3172 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3173 reg_names[HARD_FRAME_POINTER_REGNUM]
3174 = reg_names[current_frame_info.reg_fp];
3175 reg_names[current_frame_info.reg_fp] = tmp;
3177 if (! TARGET_REG_NAMES)
3179 for (i = 0; i < current_frame_info.n_input_regs; i++)
3180 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3181 for (i = 0; i < current_frame_info.n_local_regs; i++)
3182 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3183 for (i = 0; i < current_frame_info.n_output_regs; i++)
3184 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3187 current_frame_info.initialized = 0;
3191 ia64_dbx_register_number (int regno)
3193 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3194 from its home at loc79 to something inside the register frame. We
3195 must perform the same renumbering here for the debug info. */
3196 if (current_frame_info.reg_fp)
3198 if (regno == HARD_FRAME_POINTER_REGNUM)
3199 regno = current_frame_info.reg_fp;
3200 else if (regno == current_frame_info.reg_fp)
3201 regno = HARD_FRAME_POINTER_REGNUM;
3204 if (IN_REGNO_P (regno))
3205 return 32 + regno - IN_REG (0);
3206 else if (LOC_REGNO_P (regno))
3207 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3208 else if (OUT_REGNO_P (regno))
3209 return (32 + current_frame_info.n_input_regs
3210 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3216 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3218 rtx addr_reg, eight = GEN_INT (8);
3220 /* The Intel assembler requires that the global __ia64_trampoline symbol
3221 be declared explicitly */
3224 static bool declared_ia64_trampoline = false;
3226 if (!declared_ia64_trampoline)
3228 declared_ia64_trampoline = true;
3229 fputs ("\t.global\t__ia64_trampoline\n", asm_out_file);
3233 /* Load up our iterator. */
3234 addr_reg = gen_reg_rtx (Pmode);
3235 emit_move_insn (addr_reg, addr);
3237 /* The first two words are the fake descriptor:
3238 __ia64_trampoline, ADDR+16. */
3239 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3240 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3241 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3243 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3244 copy_to_reg (plus_constant (addr, 16)));
3245 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3247 /* The third word is the target descriptor. */
3248 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3249 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3251 /* The fourth word is the static chain. */
3252 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3255 /* Do any needed setup for a variadic function. CUM has not been updated
3256 for the last named argument which has type TYPE and mode MODE.
3258 We generate the actual spill instructions during prologue generation. */
3261 ia64_setup_incoming_varargs (CUMULATIVE_ARGS cum, int int_mode, tree type,
3263 int second_time ATTRIBUTE_UNUSED)
3265 /* Skip the current argument. */
3266 ia64_function_arg_advance (&cum, int_mode, type, 1);
3268 if (cum.words < MAX_ARGUMENT_SLOTS)
3270 int n = MAX_ARGUMENT_SLOTS - cum.words;
3271 *pretend_size = n * UNITS_PER_WORD;
3272 cfun->machine->n_varargs = n;
3276 /* Check whether TYPE is a homogeneous floating point aggregate. If
3277 it is, return the mode of the floating point type that appears
3278 in all leafs. If it is not, return VOIDmode.
3280 An aggregate is a homogeneous floating point aggregate is if all
3281 fields/elements in it have the same floating point type (e.g,
3282 SFmode). 128-bit quad-precision floats are excluded. */
3284 static enum machine_mode
3285 hfa_element_mode (tree type, int nested)
3287 enum machine_mode element_mode = VOIDmode;
3288 enum machine_mode mode;
3289 enum tree_code code = TREE_CODE (type);
3290 int know_element_mode = 0;
3295 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3296 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3297 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3298 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3302 /* Fortran complex types are supposed to be HFAs, so we need to handle
3303 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3306 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3307 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3308 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3309 * BITS_PER_UNIT, MODE_FLOAT, 0);
3314 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3315 mode if this is contained within an aggregate. */
3316 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3317 return TYPE_MODE (type);
3322 return hfa_element_mode (TREE_TYPE (type), 1);
3326 case QUAL_UNION_TYPE:
3327 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3329 if (TREE_CODE (t) != FIELD_DECL)
3332 mode = hfa_element_mode (TREE_TYPE (t), 1);
3333 if (know_element_mode)
3335 if (mode != element_mode)
3338 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3342 know_element_mode = 1;
3343 element_mode = mode;
3346 return element_mode;
3349 /* If we reach here, we probably have some front-end specific type
3350 that the backend doesn't know about. This can happen via the
3351 aggregate_value_p call in init_function_start. All we can do is
3352 ignore unknown tree types. */
3359 /* Return rtx for register where argument is passed, or zero if it is passed
3362 /* ??? 128-bit quad-precision floats are always passed in general
3366 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3367 int named, int incoming)
3369 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3370 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3371 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3374 enum machine_mode hfa_mode = VOIDmode;
3376 /* Integer and float arguments larger than 8 bytes start at the next even
3377 boundary. Aggregates larger than 8 bytes start at the next even boundary
3378 if the aggregate has 16 byte alignment. Net effect is that types with
3379 alignment greater than 8 start at the next even boundary. */
3380 /* ??? The ABI does not specify how to handle aggregates with alignment from
3381 9 to 15 bytes, or greater than 16. We handle them all as if they had
3382 16 byte alignment. Such aggregates can occur only if gcc extensions are
3384 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3386 && (cum->words & 1))
3389 /* If all argument slots are used, then it must go on the stack. */
3390 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3393 /* Check for and handle homogeneous FP aggregates. */
3395 hfa_mode = hfa_element_mode (type, 0);
3397 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3398 and unprototyped hfas are passed specially. */
3399 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3403 int fp_regs = cum->fp_regs;
3404 int int_regs = cum->words + offset;
3405 int hfa_size = GET_MODE_SIZE (hfa_mode);
3409 /* If prototyped, pass it in FR regs then GR regs.
3410 If not prototyped, pass it in both FR and GR regs.
3412 If this is an SFmode aggregate, then it is possible to run out of
3413 FR regs while GR regs are still left. In that case, we pass the
3414 remaining part in the GR regs. */
3416 /* Fill the FP regs. We do this always. We stop if we reach the end
3417 of the argument, the last FP register, or the last argument slot. */
3419 byte_size = ((mode == BLKmode)
3420 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3421 args_byte_size = int_regs * UNITS_PER_WORD;
3423 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3424 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3426 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3427 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3431 args_byte_size += hfa_size;
3435 /* If no prototype, then the whole thing must go in GR regs. */
3436 if (! cum->prototype)
3438 /* If this is an SFmode aggregate, then we might have some left over
3439 that needs to go in GR regs. */
3440 else if (byte_size != offset)
3441 int_regs += offset / UNITS_PER_WORD;
3443 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3445 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3447 enum machine_mode gr_mode = DImode;
3449 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3450 then this goes in a GR reg left adjusted/little endian, right
3451 adjusted/big endian. */
3452 /* ??? Currently this is handled wrong, because 4-byte hunks are
3453 always right adjusted/little endian. */
3456 /* If we have an even 4 byte hunk because the aggregate is a
3457 multiple of 4 bytes in size, then this goes in a GR reg right
3458 adjusted/little endian. */
3459 else if (byte_size - offset == 4)
3461 /* Complex floats need to have float mode. */
3462 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3465 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3466 gen_rtx_REG (gr_mode, (basereg
3469 offset += GET_MODE_SIZE (gr_mode);
3470 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3471 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3474 /* If we ended up using just one location, just return that one loc. */
3476 return XEXP (loc[0], 0);
3478 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3481 /* Integral and aggregates go in general registers. If we have run out of
3482 FR registers, then FP values must also go in general registers. This can
3483 happen when we have a SFmode HFA. */
3484 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3485 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3487 int byte_size = ((mode == BLKmode)
3488 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3489 if (BYTES_BIG_ENDIAN
3490 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3491 && byte_size < UNITS_PER_WORD
3494 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3495 gen_rtx_REG (DImode,
3496 (basereg + cum->words
3499 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3502 return gen_rtx_REG (mode, basereg + cum->words + offset);
3506 /* If there is a prototype, then FP values go in a FR register when
3507 named, and in a GR register when unnamed. */
3508 else if (cum->prototype)
3511 return gen_rtx_REG (mode, basereg + cum->words + offset);
3513 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3515 /* If there is no prototype, then FP values go in both FR and GR
3519 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3520 gen_rtx_REG (mode, (FR_ARG_FIRST
3523 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3525 (basereg + cum->words
3529 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3533 /* Return number of words, at the beginning of the argument, that must be
3534 put in registers. 0 is the argument is entirely in registers or entirely
3538 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3539 tree type, int named ATTRIBUTE_UNUSED)
3541 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3542 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3546 /* Arguments with alignment larger than 8 bytes start at the next even
3548 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3550 && (cum->words & 1))
3553 /* If all argument slots are used, then it must go on the stack. */
3554 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3557 /* It doesn't matter whether the argument goes in FR or GR regs. If
3558 it fits within the 8 argument slots, then it goes entirely in
3559 registers. If it extends past the last argument slot, then the rest
3560 goes on the stack. */
3562 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3565 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3568 /* Update CUM to point after this argument. This is patterned after
3569 ia64_function_arg. */
3572 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3573 tree type, int named)
3575 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3576 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3579 enum machine_mode hfa_mode = VOIDmode;
3581 /* If all arg slots are already full, then there is nothing to do. */
3582 if (cum->words >= MAX_ARGUMENT_SLOTS)
3585 /* Arguments with alignment larger than 8 bytes start at the next even
3587 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3589 && (cum->words & 1))
3592 cum->words += words + offset;
3594 /* Check for and handle homogeneous FP aggregates. */
3596 hfa_mode = hfa_element_mode (type, 0);
3598 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3599 and unprototyped hfas are passed specially. */
3600 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3602 int fp_regs = cum->fp_regs;
3603 /* This is the original value of cum->words + offset. */
3604 int int_regs = cum->words - words;
3605 int hfa_size = GET_MODE_SIZE (hfa_mode);
3609 /* If prototyped, pass it in FR regs then GR regs.
3610 If not prototyped, pass it in both FR and GR regs.
3612 If this is an SFmode aggregate, then it is possible to run out of
3613 FR regs while GR regs are still left. In that case, we pass the
3614 remaining part in the GR regs. */
3616 /* Fill the FP regs. We do this always. We stop if we reach the end
3617 of the argument, the last FP register, or the last argument slot. */
3619 byte_size = ((mode == BLKmode)
3620 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3621 args_byte_size = int_regs * UNITS_PER_WORD;
3623 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3624 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3627 args_byte_size += hfa_size;
3631 cum->fp_regs = fp_regs;
3634 /* Integral and aggregates go in general registers. If we have run out of
3635 FR registers, then FP values must also go in general registers. This can
3636 happen when we have a SFmode HFA. */
3637 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3638 cum->int_regs = cum->words;
3640 /* If there is a prototype, then FP values go in a FR register when
3641 named, and in a GR register when unnamed. */
3642 else if (cum->prototype)
3645 cum->int_regs = cum->words;
3647 /* ??? Complex types should not reach here. */
3648 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3650 /* If there is no prototype, then FP values go in both FR and GR
3654 /* ??? Complex types should not reach here. */
3655 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3656 cum->int_regs = cum->words;
3660 /* Variable sized types are passed by reference. */
3661 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3664 ia64_function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3665 enum machine_mode mode ATTRIBUTE_UNUSED,
3666 tree type, int named ATTRIBUTE_UNUSED)
3668 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3671 /* True if it is OK to do sibling call optimization for the specified
3672 call expression EXP. DECL will be the called function, or NULL if
3673 this is an indirect call. */
3675 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3677 /* We must always return with our current GP. This means we can
3678 only sibcall to functions defined in the current module. */
3679 return decl && (*targetm.binds_local_p) (decl);
3683 /* Implement va_arg. */
3686 ia64_va_arg (tree valist, tree type)
3690 /* Variable sized types are passed by reference. */
3691 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3693 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3694 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3697 /* Arguments with alignment larger than 8 bytes start at the next even
3699 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3701 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3702 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3703 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3704 build_int_2 (-2 * UNITS_PER_WORD, -1));
3705 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3706 TREE_SIDE_EFFECTS (t) = 1;
3707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3710 return std_expand_builtin_va_arg (valist, type);
3713 /* Return 1 if function return value returned in memory. Return 0 if it is
3717 ia64_return_in_memory (tree valtype)
3719 enum machine_mode mode;
3720 enum machine_mode hfa_mode;
3721 HOST_WIDE_INT byte_size;
3723 mode = TYPE_MODE (valtype);
3724 byte_size = GET_MODE_SIZE (mode);
3725 if (mode == BLKmode)
3727 byte_size = int_size_in_bytes (valtype);
3732 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3734 hfa_mode = hfa_element_mode (valtype, 0);
3735 if (hfa_mode != VOIDmode)
3737 int hfa_size = GET_MODE_SIZE (hfa_mode);
3739 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3744 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3750 /* Return rtx for register that holds the function return value. */
3753 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
3755 enum machine_mode mode;
3756 enum machine_mode hfa_mode;
3758 mode = TYPE_MODE (valtype);
3759 hfa_mode = hfa_element_mode (valtype, 0);
3761 if (hfa_mode != VOIDmode)
3769 hfa_size = GET_MODE_SIZE (hfa_mode);
3770 byte_size = ((mode == BLKmode)
3771 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3773 for (i = 0; offset < byte_size; i++)
3775 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3776 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3782 return XEXP (loc[0], 0);
3784 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3786 else if (FLOAT_TYPE_P (valtype) &&
3787 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3788 return gen_rtx_REG (mode, FR_ARG_FIRST);
3791 if (BYTES_BIG_ENDIAN
3792 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3800 bytesize = int_size_in_bytes (valtype);
3801 for (i = 0; offset < bytesize; i++)
3803 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3804 gen_rtx_REG (DImode,
3807 offset += UNITS_PER_WORD;
3809 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3812 return gen_rtx_REG (mode, GR_RET_FIRST);
3816 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3817 We need to emit DTP-relative relocations. */
3820 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
3824 fputs ("\tdata8.ua\t@dtprel(", file);
3825 output_addr_const (file, x);
3829 /* Print a memory address as an operand to reference that memory location. */
3831 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3832 also call this from ia64_print_operand for memory addresses. */
3835 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
3836 rtx address ATTRIBUTE_UNUSED)
3840 /* Print an operand to an assembler instruction.
3841 C Swap and print a comparison operator.
3842 D Print an FP comparison operator.
3843 E Print 32 - constant, for SImode shifts as extract.
3844 e Print 64 - constant, for DImode rotates.
3845 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3846 a floating point register emitted normally.
3847 I Invert a predicate register by adding 1.
3848 J Select the proper predicate register for a condition.
3849 j Select the inverse predicate register for a condition.
3850 O Append .acq for volatile load.
3851 P Postincrement of a MEM.
3852 Q Append .rel for volatile store.
3853 S Shift amount for shladd instruction.
3854 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3855 for Intel assembler.
3856 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3857 for Intel assembler.
3858 r Print register name, or constant 0 as r0. HP compatibility for
3861 ia64_print_operand (FILE * file, rtx x, int code)
3868 /* Handled below. */
3873 enum rtx_code c = swap_condition (GET_CODE (x));
3874 fputs (GET_RTX_NAME (c), file);
3879 switch (GET_CODE (x))
3891 str = GET_RTX_NAME (GET_CODE (x));
3898 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3902 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3906 if (x == CONST0_RTX (GET_MODE (x)))
3907 str = reg_names [FR_REG (0)];
3908 else if (x == CONST1_RTX (GET_MODE (x)))
3909 str = reg_names [FR_REG (1)];
3910 else if (GET_CODE (x) == REG)
3911 str = reg_names [REGNO (x)];
3918 fputs (reg_names [REGNO (x) + 1], file);
3924 unsigned int regno = REGNO (XEXP (x, 0));
3925 if (GET_CODE (x) == EQ)
3929 fputs (reg_names [regno], file);
3934 if (MEM_VOLATILE_P (x))
3935 fputs(".acq", file);
3940 HOST_WIDE_INT value;
3942 switch (GET_CODE (XEXP (x, 0)))
3948 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3949 if (GET_CODE (x) == CONST_INT)
3951 else if (GET_CODE (x) == REG)
3953 fprintf (file, ", %s", reg_names[REGNO (x)]);
3961 value = GET_MODE_SIZE (GET_MODE (x));
3965 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3969 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
3974 if (MEM_VOLATILE_P (x))
3975 fputs(".rel", file);
3979 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3983 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3985 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3991 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3993 const char *prefix = "0x";
3994 if (INTVAL (x) & 0x80000000)
3996 fprintf (file, "0xffffffff");
3999 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4005 /* If this operand is the constant zero, write it as register zero.
4006 Any register, zero, or CONST_INT value is OK here. */
4007 if (GET_CODE (x) == REG)
4008 fputs (reg_names[REGNO (x)], file);
4009 else if (x == CONST0_RTX (GET_MODE (x)))
4011 else if (GET_CODE (x) == CONST_INT)
4012 output_addr_const (file, x);
4014 output_operand_lossage ("invalid %%r value");
4021 /* For conditional branches, returns or calls, substitute
4022 sptk, dptk, dpnt, or spnt for %s. */
4023 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4026 int pred_val = INTVAL (XEXP (x, 0));
4028 /* Guess top and bottom 10% statically predicted. */
4029 if (pred_val < REG_BR_PROB_BASE / 50)
4031 else if (pred_val < REG_BR_PROB_BASE / 2)
4033 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4038 else if (GET_CODE (current_output_insn) == CALL_INSN)
4043 fputs (which, file);
4048 x = current_insn_predicate;
4051 unsigned int regno = REGNO (XEXP (x, 0));
4052 if (GET_CODE (x) == EQ)
4054 fprintf (file, "(%s) ", reg_names [regno]);
4059 output_operand_lossage ("ia64_print_operand: unknown code");
4063 switch (GET_CODE (x))
4065 /* This happens for the spill/restore instructions. */
4070 /* ... fall through ... */
4073 fputs (reg_names [REGNO (x)], file);
4078 rtx addr = XEXP (x, 0);
4079 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4080 addr = XEXP (addr, 0);
4081 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4086 output_addr_const (file, x);
4093 /* Compute a (partial) cost for rtx X. Return true if the complete
4094 cost has been computed, and false if subexpressions should be
4095 scanned. In either case, *TOTAL contains the cost result. */
4096 /* ??? This is incomplete. */
4099 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4107 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4110 if (CONST_OK_FOR_I (INTVAL (x)))
4112 else if (CONST_OK_FOR_J (INTVAL (x)))
4115 *total = COSTS_N_INSNS (1);
4118 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4121 *total = COSTS_N_INSNS (1);
4126 *total = COSTS_N_INSNS (1);
4132 *total = COSTS_N_INSNS (3);
4136 /* For multiplies wider than HImode, we have to go to the FPU,
4137 which normally involves copies. Plus there's the latency
4138 of the multiply itself, and the latency of the instructions to
4139 transfer integer regs to FP regs. */
4140 /* ??? Check for FP mode. */
4141 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4142 *total = COSTS_N_INSNS (10);
4144 *total = COSTS_N_INSNS (2);
4152 *total = COSTS_N_INSNS (1);
4159 /* We make divide expensive, so that divide-by-constant will be
4160 optimized to a multiply. */
4161 *total = COSTS_N_INSNS (60);
4169 /* Calculate the cost of moving data from a register in class FROM to
4170 one in class TO, using MODE. */
4173 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4176 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4177 if (to == ADDL_REGS)
4179 if (from == ADDL_REGS)
4182 /* All costs are symmetric, so reduce cases by putting the
4183 lower number class as the destination. */
4186 enum reg_class tmp = to;
4187 to = from, from = tmp;
4190 /* Moving from FR<->GR in TFmode must be more expensive than 2,
4191 so that we get secondary memory reloads. Between FR_REGS,
4192 we have to make this at least as expensive as MEMORY_MOVE_COST
4193 to avoid spectacularly poor register class preferencing. */
4196 if (to != GR_REGS || from != GR_REGS)
4197 return MEMORY_MOVE_COST (mode, to, 0);
4205 /* Moving between PR registers takes two insns. */
4206 if (from == PR_REGS)
4208 /* Moving between PR and anything but GR is impossible. */
4209 if (from != GR_REGS)
4210 return MEMORY_MOVE_COST (mode, to, 0);
4214 /* Moving between BR and anything but GR is impossible. */
4215 if (from != GR_REGS && from != GR_AND_BR_REGS)
4216 return MEMORY_MOVE_COST (mode, to, 0);
4221 /* Moving between AR and anything but GR is impossible. */
4222 if (from != GR_REGS)
4223 return MEMORY_MOVE_COST (mode, to, 0);
4228 case GR_AND_FR_REGS:
4229 case GR_AND_BR_REGS:
4240 /* This function returns the register class required for a secondary
4241 register when copying between one of the registers in CLASS, and X,
4242 using MODE. A return value of NO_REGS means that no secondary register
4246 ia64_secondary_reload_class (enum reg_class class,
4247 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4251 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4252 regno = true_regnum (x);
4259 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4260 interaction. We end up with two pseudos with overlapping lifetimes
4261 both of which are equiv to the same constant, and both which need
4262 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4263 changes depending on the path length, which means the qty_first_reg
4264 check in make_regs_eqv can give different answers at different times.
4265 At some point I'll probably need a reload_indi pattern to handle
4268 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4269 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4270 non-general registers for good measure. */
4271 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4274 /* This is needed if a pseudo used as a call_operand gets spilled to a
4276 if (GET_CODE (x) == MEM)
4281 /* Need to go through general registers to get to other class regs. */
4282 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4285 /* This can happen when a paradoxical subreg is an operand to the
4287 /* ??? This shouldn't be necessary after instruction scheduling is
4288 enabled, because paradoxical subregs are not accepted by
4289 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4290 stop the paradoxical subreg stupidity in the *_operand functions
4292 if (GET_CODE (x) == MEM
4293 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4294 || GET_MODE (x) == QImode))
4297 /* This can happen because of the ior/and/etc patterns that accept FP
4298 registers as operands. If the third operand is a constant, then it
4299 needs to be reloaded into a FP register. */
4300 if (GET_CODE (x) == CONST_INT)
4303 /* This can happen because of register elimination in a muldi3 insn.
4304 E.g. `26107 * (unsigned long)&u'. */
4305 if (GET_CODE (x) == PLUS)
4310 /* ??? This happens if we cse/gcse a BImode value across a call,
4311 and the function has a nonlocal goto. This is because global
4312 does not allocate call crossing pseudos to hard registers when
4313 current_function_has_nonlocal_goto is true. This is relatively
4314 common for C++ programs that use exceptions. To reproduce,
4315 return NO_REGS and compile libstdc++. */
4316 if (GET_CODE (x) == MEM)
4319 /* This can happen when we take a BImode subreg of a DImode value,
4320 and that DImode value winds up in some non-GR register. */
4321 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4326 /* Since we have no offsettable memory addresses, we need a temporary
4327 to hold the address of the second word. */
4340 /* Emit text to declare externally defined variables and functions, because
4341 the Intel assembler does not support undefined externals. */
4344 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4346 int save_referenced;
4348 /* GNU as does not need anything here, but the HP linker does need
4349 something for external functions. */
4353 || TREE_CODE (decl) != FUNCTION_DECL
4354 || strstr(name, "__builtin_") == name))
4357 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4358 the linker when we do this, so we need to be careful not to do this for
4359 builtin functions which have no library equivalent. Unfortunately, we
4360 can't tell here whether or not a function will actually be called by
4361 expand_expr, so we pull in library functions even if we may not need
4363 if (! strcmp (name, "__builtin_next_arg")
4364 || ! strcmp (name, "alloca")
4365 || ! strcmp (name, "__builtin_constant_p")
4366 || ! strcmp (name, "__builtin_args_info"))
4370 ia64_hpux_add_extern_decl (name);
4373 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4375 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4376 if (TREE_CODE (decl) == FUNCTION_DECL)
4377 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4378 (*targetm.asm_out.globalize_label) (file, name);
4379 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4383 /* Parse the -mfixed-range= option string. */
4386 fix_range (const char *const_str)
4389 char *str, *dash, *comma;
4391 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4392 REG2 are either register names or register numbers. The effect
4393 of this option is to mark the registers in the range from REG1 to
4394 REG2 as ``fixed'' so they won't be used by the compiler. This is
4395 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4397 i = strlen (const_str);
4398 str = (char *) alloca (i + 1);
4399 memcpy (str, const_str, i + 1);
4403 dash = strchr (str, '-');
4406 warning ("value of -mfixed-range must have form REG1-REG2");
4411 comma = strchr (dash + 1, ',');
4415 first = decode_reg_name (str);
4418 warning ("unknown register name: %s", str);
4422 last = decode_reg_name (dash + 1);
4425 warning ("unknown register name: %s", dash + 1);
4433 warning ("%s-%s is an empty range", str, dash + 1);
4437 for (i = first; i <= last; ++i)
4438 fixed_regs[i] = call_used_regs[i] = 1;
4448 static struct machine_function *
4449 ia64_init_machine_status (void)
4451 return ggc_alloc_cleared (sizeof (struct machine_function));
4454 /* Handle TARGET_OPTIONS switches. */
4457 ia64_override_options (void)
4461 const char *const name; /* processor name or nickname. */
4462 const enum processor_type processor;
4464 const processor_alias_table[] =
4466 {"itanium", PROCESSOR_ITANIUM},
4467 {"itanium1", PROCESSOR_ITANIUM},
4468 {"merced", PROCESSOR_ITANIUM},
4469 {"itanium2", PROCESSOR_ITANIUM2},
4470 {"mckinley", PROCESSOR_ITANIUM2},
4473 int const pta_size = ARRAY_SIZE (processor_alias_table);
4476 if (TARGET_AUTO_PIC)
4477 target_flags |= MASK_CONST_GP;
4479 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4481 warning ("cannot optimize floating point division for both latency and throughput");
4482 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4485 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4487 warning ("cannot optimize integer division for both latency and throughput");
4488 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4491 if (ia64_fixed_range_string)
4492 fix_range (ia64_fixed_range_string);
4494 if (ia64_tls_size_string)
4497 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4498 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4499 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4501 ia64_tls_size = tmp;
4504 if (!ia64_tune_string)
4505 ia64_tune_string = "itanium2";
4507 for (i = 0; i < pta_size; i++)
4508 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4510 ia64_tune = processor_alias_table[i].processor;
4515 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4517 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4518 flag_schedule_insns_after_reload = 0;
4520 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4522 init_machine_status = ia64_init_machine_status;
4524 /* Tell the compiler which flavor of TFmode we're using. */
4525 if (!INTEL_EXTENDED_IEEE_FORMAT)
4526 REAL_MODE_FORMAT (TFmode) = &ieee_quad_format;
4529 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4530 static enum attr_type ia64_safe_type (rtx);
4532 static enum attr_itanium_class
4533 ia64_safe_itanium_class (rtx insn)
4535 if (recog_memoized (insn) >= 0)
4536 return get_attr_itanium_class (insn);
4538 return ITANIUM_CLASS_UNKNOWN;
4541 static enum attr_type
4542 ia64_safe_type (rtx insn)
4544 if (recog_memoized (insn) >= 0)
4545 return get_attr_type (insn);
4547 return TYPE_UNKNOWN;
4550 /* The following collection of routines emit instruction group stop bits as
4551 necessary to avoid dependencies. */
4553 /* Need to track some additional registers as far as serialization is
4554 concerned so we can properly handle br.call and br.ret. We could
4555 make these registers visible to gcc, but since these registers are
4556 never explicitly used in gcc generated code, it seems wasteful to
4557 do so (plus it would make the call and return patterns needlessly
4559 #define REG_GP (GR_REG (1))
4560 #define REG_RP (BR_REG (0))
4561 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4562 /* This is used for volatile asms which may require a stop bit immediately
4563 before and after them. */
4564 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4565 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4566 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4568 /* For each register, we keep track of how it has been written in the
4569 current instruction group.
4571 If a register is written unconditionally (no qualifying predicate),
4572 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4574 If a register is written if its qualifying predicate P is true, we
4575 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4576 may be written again by the complement of P (P^1) and when this happens,
4577 WRITE_COUNT gets set to 2.
4579 The result of this is that whenever an insn attempts to write a register
4580 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4582 If a predicate register is written by a floating-point insn, we set
4583 WRITTEN_BY_FP to true.
4585 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4586 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4588 struct reg_write_state
4590 unsigned int write_count : 2;
4591 unsigned int first_pred : 16;
4592 unsigned int written_by_fp : 1;
4593 unsigned int written_by_and : 1;
4594 unsigned int written_by_or : 1;
4597 /* Cumulative info for the current instruction group. */
4598 struct reg_write_state rws_sum[NUM_REGS];
4599 /* Info for the current instruction. This gets copied to rws_sum after a
4600 stop bit is emitted. */
4601 struct reg_write_state rws_insn[NUM_REGS];
4603 /* Indicates whether this is the first instruction after a stop bit,
4604 in which case we don't need another stop bit. Without this, we hit
4605 the abort in ia64_variable_issue when scheduling an alloc. */
4606 static int first_instruction;
4608 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4609 RTL for one instruction. */
4612 unsigned int is_write : 1; /* Is register being written? */
4613 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4614 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4615 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4616 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4617 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4620 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4621 static int rws_access_regno (int, struct reg_flags, int);
4622 static int rws_access_reg (rtx, struct reg_flags, int);
4623 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4624 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4625 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4626 static void init_insn_group_barriers (void);
4627 static int group_barrier_needed_p (rtx);
4628 static int safe_group_barrier_needed_p (rtx);
4630 /* Update *RWS for REGNO, which is being written by the current instruction,
4631 with predicate PRED, and associated register flags in FLAGS. */
4634 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4637 rws[regno].write_count++;
4639 rws[regno].write_count = 2;
4640 rws[regno].written_by_fp |= flags.is_fp;
4641 /* ??? Not tracking and/or across differing predicates. */
4642 rws[regno].written_by_and = flags.is_and;
4643 rws[regno].written_by_or = flags.is_or;
4644 rws[regno].first_pred = pred;
4647 /* Handle an access to register REGNO of type FLAGS using predicate register
4648 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4649 a dependency with an earlier instruction in the same group. */
4652 rws_access_regno (int regno, struct reg_flags flags, int pred)
4654 int need_barrier = 0;
4656 if (regno >= NUM_REGS)
4659 if (! PR_REGNO_P (regno))
4660 flags.is_and = flags.is_or = 0;
4666 /* One insn writes same reg multiple times? */
4667 if (rws_insn[regno].write_count > 0)
4670 /* Update info for current instruction. */
4671 rws_update (rws_insn, regno, flags, pred);
4672 write_count = rws_sum[regno].write_count;
4674 switch (write_count)
4677 /* The register has not been written yet. */
4678 rws_update (rws_sum, regno, flags, pred);
4682 /* The register has been written via a predicate. If this is
4683 not a complementary predicate, then we need a barrier. */
4684 /* ??? This assumes that P and P+1 are always complementary
4685 predicates for P even. */
4686 if (flags.is_and && rws_sum[regno].written_by_and)
4688 else if (flags.is_or && rws_sum[regno].written_by_or)
4690 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4692 rws_update (rws_sum, regno, flags, pred);
4696 /* The register has been unconditionally written already. We
4698 if (flags.is_and && rws_sum[regno].written_by_and)
4700 else if (flags.is_or && rws_sum[regno].written_by_or)
4704 rws_sum[regno].written_by_and = flags.is_and;
4705 rws_sum[regno].written_by_or = flags.is_or;
4714 if (flags.is_branch)
4716 /* Branches have several RAW exceptions that allow to avoid
4719 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4720 /* RAW dependencies on branch regs are permissible as long
4721 as the writer is a non-branch instruction. Since we
4722 never generate code that uses a branch register written
4723 by a branch instruction, handling this case is
4727 if (REGNO_REG_CLASS (regno) == PR_REGS
4728 && ! rws_sum[regno].written_by_fp)
4729 /* The predicates of a branch are available within the
4730 same insn group as long as the predicate was written by
4731 something other than a floating-point instruction. */
4735 if (flags.is_and && rws_sum[regno].written_by_and)
4737 if (flags.is_or && rws_sum[regno].written_by_or)
4740 switch (rws_sum[regno].write_count)
4743 /* The register has not been written yet. */
4747 /* The register has been written via a predicate. If this is
4748 not a complementary predicate, then we need a barrier. */
4749 /* ??? This assumes that P and P+1 are always complementary
4750 predicates for P even. */
4751 if ((rws_sum[regno].first_pred ^ 1) != pred)
4756 /* The register has been unconditionally written already. We
4766 return need_barrier;
4770 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
4772 int regno = REGNO (reg);
4773 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4776 return rws_access_regno (regno, flags, pred);
4779 int need_barrier = 0;
4781 need_barrier |= rws_access_regno (regno + n, flags, pred);
4782 return need_barrier;
4786 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4787 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4790 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
4792 rtx src = SET_SRC (x);
4796 switch (GET_CODE (src))
4802 if (SET_DEST (x) == pc_rtx)
4803 /* X is a conditional branch. */
4807 int is_complemented = 0;
4809 /* X is a conditional move. */
4810 rtx cond = XEXP (src, 0);
4811 if (GET_CODE (cond) == EQ)
4812 is_complemented = 1;
4813 cond = XEXP (cond, 0);
4814 if (GET_CODE (cond) != REG
4815 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4818 if (XEXP (src, 1) == SET_DEST (x)
4819 || XEXP (src, 2) == SET_DEST (x))
4821 /* X is a conditional move that conditionally writes the
4824 /* We need another complement in this case. */
4825 if (XEXP (src, 1) == SET_DEST (x))
4826 is_complemented = ! is_complemented;
4828 *ppred = REGNO (cond);
4829 if (is_complemented)
4833 /* ??? If this is a conditional write to the dest, then this
4834 instruction does not actually read one source. This probably
4835 doesn't matter, because that source is also the dest. */
4836 /* ??? Multiple writes to predicate registers are allowed
4837 if they are all AND type compares, or if they are all OR
4838 type compares. We do not generate such instructions
4841 /* ... fall through ... */
4844 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4845 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4846 /* Set pflags->is_fp to 1 so that we know we're dealing
4847 with a floating point comparison when processing the
4848 destination of the SET. */
4851 /* Discover if this is a parallel comparison. We only handle
4852 and.orcm and or.andcm at present, since we must retain a
4853 strict inverse on the predicate pair. */
4854 else if (GET_CODE (src) == AND)
4856 else if (GET_CODE (src) == IOR)
4863 /* Subroutine of rtx_needs_barrier; this function determines whether the
4864 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4865 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4869 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
4871 int need_barrier = 0;
4873 rtx src = SET_SRC (x);
4875 if (GET_CODE (src) == CALL)
4876 /* We don't need to worry about the result registers that
4877 get written by subroutine call. */
4878 return rtx_needs_barrier (src, flags, pred);
4879 else if (SET_DEST (x) == pc_rtx)
4881 /* X is a conditional branch. */
4882 /* ??? This seems redundant, as the caller sets this bit for
4884 flags.is_branch = 1;
4885 return rtx_needs_barrier (src, flags, pred);
4888 need_barrier = rtx_needs_barrier (src, flags, pred);
4890 /* This instruction unconditionally uses a predicate register. */
4892 need_barrier |= rws_access_reg (cond, flags, 0);
4895 if (GET_CODE (dst) == ZERO_EXTRACT)
4897 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4898 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4899 dst = XEXP (dst, 0);
4901 return need_barrier;
4904 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4905 Return 1 is this access creates a dependency with an earlier instruction
4906 in the same group. */
4909 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
4912 int is_complemented = 0;
4913 int need_barrier = 0;
4914 const char *format_ptr;
4915 struct reg_flags new_flags;
4923 switch (GET_CODE (x))
4926 update_set_flags (x, &new_flags, &pred, &cond);
4927 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4928 if (GET_CODE (SET_SRC (x)) != CALL)
4930 new_flags.is_write = 1;
4931 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4936 new_flags.is_write = 0;
4937 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4939 /* Avoid multiple register writes, in case this is a pattern with
4940 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4941 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4943 new_flags.is_write = 1;
4944 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4945 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4946 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4951 /* X is a predicated instruction. */
4953 cond = COND_EXEC_TEST (x);
4956 need_barrier = rtx_needs_barrier (cond, flags, 0);
4958 if (GET_CODE (cond) == EQ)
4959 is_complemented = 1;
4960 cond = XEXP (cond, 0);
4961 if (GET_CODE (cond) != REG
4962 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4964 pred = REGNO (cond);
4965 if (is_complemented)
4968 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4969 return need_barrier;
4973 /* Clobber & use are for earlier compiler-phases only. */
4978 /* We always emit stop bits for traditional asms. We emit stop bits
4979 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4980 if (GET_CODE (x) != ASM_OPERANDS
4981 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4983 /* Avoid writing the register multiple times if we have multiple
4984 asm outputs. This avoids an abort in rws_access_reg. */
4985 if (! rws_insn[REG_VOLATILE].write_count)
4987 new_flags.is_write = 1;
4988 rws_access_regno (REG_VOLATILE, new_flags, pred);
4993 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4994 We can not just fall through here since then we would be confused
4995 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4996 traditional asms unlike their normal usage. */
4998 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4999 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5004 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5006 rtx pat = XVECEXP (x, 0, i);
5007 if (GET_CODE (pat) == SET)
5009 update_set_flags (pat, &new_flags, &pred, &cond);
5010 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5012 else if (GET_CODE (pat) == USE
5013 || GET_CODE (pat) == CALL
5014 || GET_CODE (pat) == ASM_OPERANDS)
5015 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5016 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5019 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5021 rtx pat = XVECEXP (x, 0, i);
5022 if (GET_CODE (pat) == SET)
5024 if (GET_CODE (SET_SRC (pat)) != CALL)
5026 new_flags.is_write = 1;
5027 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5031 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5032 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5040 if (REGNO (x) == AR_UNAT_REGNUM)
5042 for (i = 0; i < 64; ++i)
5043 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5046 need_barrier = rws_access_reg (x, flags, pred);
5050 /* Find the regs used in memory address computation. */
5051 new_flags.is_write = 0;
5052 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5055 case CONST_INT: case CONST_DOUBLE:
5056 case SYMBOL_REF: case LABEL_REF: case CONST:
5059 /* Operators with side-effects. */
5060 case POST_INC: case POST_DEC:
5061 if (GET_CODE (XEXP (x, 0)) != REG)
5064 new_flags.is_write = 0;
5065 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5066 new_flags.is_write = 1;
5067 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5071 if (GET_CODE (XEXP (x, 0)) != REG)
5074 new_flags.is_write = 0;
5075 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5076 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5077 new_flags.is_write = 1;
5078 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5081 /* Handle common unary and binary ops for efficiency. */
5082 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5083 case MOD: case UDIV: case UMOD: case AND: case IOR:
5084 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5085 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5086 case NE: case EQ: case GE: case GT: case LE:
5087 case LT: case GEU: case GTU: case LEU: case LTU:
5088 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5089 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5092 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5093 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5094 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5095 case SQRT: case FFS: case POPCOUNT:
5096 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5100 switch (XINT (x, 1))
5102 case UNSPEC_LTOFF_DTPMOD:
5103 case UNSPEC_LTOFF_DTPREL:
5105 case UNSPEC_LTOFF_TPREL:
5107 case UNSPEC_PRED_REL_MUTEX:
5108 case UNSPEC_PIC_CALL:
5110 case UNSPEC_FETCHADD_ACQ:
5111 case UNSPEC_BSP_VALUE:
5112 case UNSPEC_FLUSHRS:
5113 case UNSPEC_BUNDLE_SELECTOR:
5116 case UNSPEC_GR_SPILL:
5117 case UNSPEC_GR_RESTORE:
5119 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5120 HOST_WIDE_INT bit = (offset >> 3) & 63;
5122 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5123 new_flags.is_write = (XINT (x, 1) == 1);
5124 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5129 case UNSPEC_FR_SPILL:
5130 case UNSPEC_FR_RESTORE:
5131 case UNSPEC_GETF_EXP:
5133 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5136 case UNSPEC_FR_RECIP_APPROX:
5137 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5138 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5141 case UNSPEC_CMPXCHG_ACQ:
5142 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5143 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5151 case UNSPEC_VOLATILE:
5152 switch (XINT (x, 1))
5155 /* Alloc must always be the first instruction of a group.
5156 We force this by always returning true. */
5157 /* ??? We might get better scheduling if we explicitly check for
5158 input/local/output register dependencies, and modify the
5159 scheduler so that alloc is always reordered to the start of
5160 the current group. We could then eliminate all of the
5161 first_instruction code. */
5162 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5164 new_flags.is_write = 1;
5165 rws_access_regno (REG_AR_CFM, new_flags, pred);
5168 case UNSPECV_SET_BSP:
5172 case UNSPECV_BLOCKAGE:
5173 case UNSPECV_INSN_GROUP_BARRIER:
5175 case UNSPECV_PSAC_ALL:
5176 case UNSPECV_PSAC_NORMAL:
5185 new_flags.is_write = 0;
5186 need_barrier = rws_access_regno (REG_RP, flags, pred);
5187 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5189 new_flags.is_write = 1;
5190 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5191 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5195 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5196 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5197 switch (format_ptr[i])
5199 case '0': /* unused field */
5200 case 'i': /* integer */
5201 case 'n': /* note */
5202 case 'w': /* wide integer */
5203 case 's': /* pointer to string */
5204 case 'S': /* optional pointer to string */
5208 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5213 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5214 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5223 return need_barrier;
5226 /* Clear out the state for group_barrier_needed_p at the start of a
5227 sequence of insns. */
5230 init_insn_group_barriers (void)
5232 memset (rws_sum, 0, sizeof (rws_sum));
5233 first_instruction = 1;
5236 /* Given the current state, recorded by previous calls to this function,
5237 determine whether a group barrier (a stop bit) is necessary before INSN.
5238 Return nonzero if so. */
5241 group_barrier_needed_p (rtx insn)
5244 int need_barrier = 0;
5245 struct reg_flags flags;
5247 memset (&flags, 0, sizeof (flags));
5248 switch (GET_CODE (insn))
5254 /* A barrier doesn't imply an instruction group boundary. */
5258 memset (rws_insn, 0, sizeof (rws_insn));
5262 flags.is_branch = 1;
5263 flags.is_sibcall = SIBLING_CALL_P (insn);
5264 memset (rws_insn, 0, sizeof (rws_insn));
5266 /* Don't bundle a call following another call. */
5267 if ((pat = prev_active_insn (insn))
5268 && GET_CODE (pat) == CALL_INSN)
5274 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5278 flags.is_branch = 1;
5280 /* Don't bundle a jump following a call. */
5281 if ((pat = prev_active_insn (insn))
5282 && GET_CODE (pat) == CALL_INSN)
5290 if (GET_CODE (PATTERN (insn)) == USE
5291 || GET_CODE (PATTERN (insn)) == CLOBBER)
5292 /* Don't care about USE and CLOBBER "insns"---those are used to
5293 indicate to the optimizer that it shouldn't get rid of
5294 certain operations. */
5297 pat = PATTERN (insn);
5299 /* Ug. Hack hacks hacked elsewhere. */
5300 switch (recog_memoized (insn))
5302 /* We play dependency tricks with the epilogue in order
5303 to get proper schedules. Undo this for dv analysis. */
5304 case CODE_FOR_epilogue_deallocate_stack:
5305 case CODE_FOR_prologue_allocate_stack:
5306 pat = XVECEXP (pat, 0, 0);
5309 /* The pattern we use for br.cloop confuses the code above.
5310 The second element of the vector is representative. */
5311 case CODE_FOR_doloop_end_internal:
5312 pat = XVECEXP (pat, 0, 1);
5315 /* Doesn't generate code. */
5316 case CODE_FOR_pred_rel_mutex:
5317 case CODE_FOR_prologue_use:
5324 memset (rws_insn, 0, sizeof (rws_insn));
5325 need_barrier = rtx_needs_barrier (pat, flags, 0);
5327 /* Check to see if the previous instruction was a volatile
5330 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5337 if (first_instruction && INSN_P (insn)
5338 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5339 && GET_CODE (PATTERN (insn)) != USE
5340 && GET_CODE (PATTERN (insn)) != CLOBBER)
5343 first_instruction = 0;
5346 return need_barrier;
5349 /* Like group_barrier_needed_p, but do not clobber the current state. */
5352 safe_group_barrier_needed_p (rtx insn)
5354 struct reg_write_state rws_saved[NUM_REGS];
5355 int saved_first_instruction;
5358 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5359 saved_first_instruction = first_instruction;
5361 t = group_barrier_needed_p (insn);
5363 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5364 first_instruction = saved_first_instruction;
5369 /* Scan the current function and insert stop bits as necessary to
5370 eliminate dependencies. This function assumes that a final
5371 instruction scheduling pass has been run which has already
5372 inserted most of the necessary stop bits. This function only
5373 inserts new ones at basic block boundaries, since these are
5374 invisible to the scheduler. */
5377 emit_insn_group_barriers (FILE *dump)
5381 int insns_since_last_label = 0;
5383 init_insn_group_barriers ();
5385 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5387 if (GET_CODE (insn) == CODE_LABEL)
5389 if (insns_since_last_label)
5391 insns_since_last_label = 0;
5393 else if (GET_CODE (insn) == NOTE
5394 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5396 if (insns_since_last_label)
5398 insns_since_last_label = 0;
5400 else if (GET_CODE (insn) == INSN
5401 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5402 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5404 init_insn_group_barriers ();
5407 else if (INSN_P (insn))
5409 insns_since_last_label = 1;
5411 if (group_barrier_needed_p (insn))
5416 fprintf (dump, "Emitting stop before label %d\n",
5417 INSN_UID (last_label));
5418 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5421 init_insn_group_barriers ();
5429 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5430 This function has to emit all necessary group barriers. */
5433 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5437 init_insn_group_barriers ();
5439 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5441 if (GET_CODE (insn) == BARRIER)
5443 rtx last = prev_active_insn (insn);
5447 if (GET_CODE (last) == JUMP_INSN
5448 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5449 last = prev_active_insn (last);
5450 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5451 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5453 init_insn_group_barriers ();
5455 else if (INSN_P (insn))
5457 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5458 init_insn_group_barriers ();
5459 else if (group_barrier_needed_p (insn))
5461 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5462 init_insn_group_barriers ();
5463 group_barrier_needed_p (insn);
5470 static int errata_find_address_regs (rtx *, void *);
5471 static void errata_emit_nops (rtx);
5472 static void fixup_errata (void);
5474 /* This structure is used to track some details about the previous insns
5475 groups so we can determine if it may be necessary to insert NOPs to
5476 workaround hardware errata. */
5479 HARD_REG_SET p_reg_set;
5480 HARD_REG_SET gr_reg_conditionally_set;
5483 /* Index into the last_group array. */
5484 static int group_idx;
5486 /* Called through for_each_rtx; determines if a hard register that was
5487 conditionally set in the previous group is used as an address register.
5488 It ensures that for_each_rtx returns 1 in that case. */
5490 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5493 if (GET_CODE (x) != MEM)
5496 if (GET_CODE (x) == POST_MODIFY)
5498 if (GET_CODE (x) == REG)
5500 struct group *prev_group = last_group + (group_idx ^ 1);
5501 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5509 /* Called for each insn; this function keeps track of the state in
5510 last_group and emits additional NOPs if necessary to work around
5511 an Itanium A/B step erratum. */
5513 errata_emit_nops (rtx insn)
5515 struct group *this_group = last_group + group_idx;
5516 struct group *prev_group = last_group + (group_idx ^ 1);
5517 rtx pat = PATTERN (insn);
5518 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5519 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5520 enum attr_type type;
5523 if (GET_CODE (real_pat) == USE
5524 || GET_CODE (real_pat) == CLOBBER
5525 || GET_CODE (real_pat) == ASM_INPUT
5526 || GET_CODE (real_pat) == ADDR_VEC
5527 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5528 || asm_noperands (PATTERN (insn)) >= 0)
5531 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5534 if (GET_CODE (set) == PARALLEL)
5537 set = XVECEXP (real_pat, 0, 0);
5538 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5539 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5540 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5547 if (set && GET_CODE (set) != SET)
5550 type = get_attr_type (insn);
5553 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5554 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5556 if ((type == TYPE_M || type == TYPE_A) && cond && set
5557 && REG_P (SET_DEST (set))
5558 && GET_CODE (SET_SRC (set)) != PLUS
5559 && GET_CODE (SET_SRC (set)) != MINUS
5560 && (GET_CODE (SET_SRC (set)) != ASHIFT
5561 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5562 && (GET_CODE (SET_SRC (set)) != MEM
5563 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5564 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5566 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5567 || ! REG_P (XEXP (cond, 0)))
5570 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5571 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5573 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5575 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5576 emit_insn_before (gen_nop (), insn);
5577 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5579 memset (last_group, 0, sizeof last_group);
5583 /* Emit extra nops if they are required to work around hardware errata. */
5590 if (! TARGET_B_STEP)
5594 memset (last_group, 0, sizeof last_group);
5596 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5601 if (ia64_safe_type (insn) == TYPE_S)
5604 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5607 errata_emit_nops (insn);
5612 /* Instruction scheduling support. */
5614 #define NR_BUNDLES 10
5616 /* A list of names of all available bundles. */
5618 static const char *bundle_name [NR_BUNDLES] =
5624 #if NR_BUNDLES == 10
5634 /* Nonzero if we should insert stop bits into the schedule. */
5636 int ia64_final_schedule = 0;
5638 /* Codes of the corresponding quieryied units: */
5640 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5641 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5643 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5644 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5646 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5648 /* The following variable value is an insn group barrier. */
5650 static rtx dfa_stop_insn;
5652 /* The following variable value is the last issued insn. */
5654 static rtx last_scheduled_insn;
5656 /* The following variable value is size of the DFA state. */
5658 static size_t dfa_state_size;
5660 /* The following variable value is pointer to a DFA state used as
5661 temporary variable. */
5663 static state_t temp_dfa_state = NULL;
5665 /* The following variable value is DFA state after issuing the last
5668 static state_t prev_cycle_state = NULL;
5670 /* The following array element values are TRUE if the corresponding
5671 insn requires to add stop bits before it. */
5673 static char *stops_p;
5675 /* The following variable is used to set up the mentioned above array. */
5677 static int stop_before_p = 0;
5679 /* The following variable value is length of the arrays `clocks' and
5682 static int clocks_length;
5684 /* The following array element values are cycles on which the
5685 corresponding insn will be issued. The array is used only for
5690 /* The following array element values are numbers of cycles should be
5691 added to improve insn scheduling for MM_insns for Itanium1. */
5693 static int *add_cycles;
5695 static rtx ia64_single_set (rtx);
5696 static void ia64_emit_insn_before (rtx, rtx);
5698 /* Map a bundle number to its pseudo-op. */
5701 get_bundle_name (int b)
5703 return bundle_name[b];
5707 /* Return the maximum number of instructions a cpu can issue. */
5710 ia64_issue_rate (void)
5715 /* Helper function - like single_set, but look inside COND_EXEC. */
5718 ia64_single_set (rtx insn)
5720 rtx x = PATTERN (insn), ret;
5721 if (GET_CODE (x) == COND_EXEC)
5722 x = COND_EXEC_CODE (x);
5723 if (GET_CODE (x) == SET)
5726 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5727 Although they are not classical single set, the second set is there just
5728 to protect it from moving past FP-relative stack accesses. */
5729 switch (recog_memoized (insn))
5731 case CODE_FOR_prologue_allocate_stack:
5732 case CODE_FOR_epilogue_deallocate_stack:
5733 ret = XVECEXP (x, 0, 0);
5737 ret = single_set_2 (insn, x);
5744 /* Adjust the cost of a scheduling dependency. Return the new cost of
5745 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5748 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5750 enum attr_itanium_class dep_class;
5751 enum attr_itanium_class insn_class;
5753 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5756 insn_class = ia64_safe_itanium_class (insn);
5757 dep_class = ia64_safe_itanium_class (dep_insn);
5758 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5759 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5765 /* Like emit_insn_before, but skip cycle_display notes.
5766 ??? When cycle display notes are implemented, update this. */
5769 ia64_emit_insn_before (rtx insn, rtx before)
5771 emit_insn_before (insn, before);
5774 /* The following function marks insns who produce addresses for load
5775 and store insns. Such insns will be placed into M slots because it
5776 decrease latency time for Itanium1 (see function
5777 `ia64_produce_address_p' and the DFA descriptions). */
5780 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
5782 rtx insn, link, next, next_tail;
5784 next_tail = NEXT_INSN (tail);
5785 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5788 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5790 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5792 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5794 next = XEXP (link, 0);
5795 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5796 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5797 && ia64_st_address_bypass_p (insn, next))
5799 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5800 || ia64_safe_itanium_class (next)
5801 == ITANIUM_CLASS_FLD)
5802 && ia64_ld_address_bypass_p (insn, next))
5805 insn->call = link != 0;
5809 /* We're beginning a new block. Initialize data structures as necessary. */
5812 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
5813 int sched_verbose ATTRIBUTE_UNUSED,
5814 int max_ready ATTRIBUTE_UNUSED)
5816 #ifdef ENABLE_CHECKING
5819 if (reload_completed)
5820 for (insn = NEXT_INSN (current_sched_info->prev_head);
5821 insn != current_sched_info->next_tail;
5822 insn = NEXT_INSN (insn))
5823 if (SCHED_GROUP_P (insn))
5826 last_scheduled_insn = NULL_RTX;
5827 init_insn_group_barriers ();
5830 /* We are about to being issuing insns for this clock cycle.
5831 Override the default sort algorithm to better slot instructions. */
5834 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
5835 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
5839 int n_ready = *pn_ready;
5840 rtx *e_ready = ready + n_ready;
5844 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
5846 if (reorder_type == 0)
5848 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5850 for (insnp = ready; insnp < e_ready; insnp++)
5851 if (insnp < e_ready)
5854 enum attr_type t = ia64_safe_type (insn);
5855 if (t == TYPE_UNKNOWN)
5857 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5858 || asm_noperands (PATTERN (insn)) >= 0)
5860 rtx lowest = ready[n_asms];
5861 ready[n_asms] = insn;
5867 rtx highest = ready[n_ready - 1];
5868 ready[n_ready - 1] = insn;
5875 if (n_asms < n_ready)
5877 /* Some normal insns to process. Skip the asms. */
5881 else if (n_ready > 0)
5885 if (ia64_final_schedule)
5888 int nr_need_stop = 0;
5890 for (insnp = ready; insnp < e_ready; insnp++)
5891 if (safe_group_barrier_needed_p (*insnp))
5894 if (reorder_type == 1 && n_ready == nr_need_stop)
5896 if (reorder_type == 0)
5899 /* Move down everything that needs a stop bit, preserving
5901 while (insnp-- > ready + deleted)
5902 while (insnp >= ready + deleted)
5905 if (! safe_group_barrier_needed_p (insn))
5907 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5918 /* We are about to being issuing insns for this clock cycle. Override
5919 the default sort algorithm to better slot instructions. */
5922 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
5925 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
5926 pn_ready, clock_var, 0);
5929 /* Like ia64_sched_reorder, but called after issuing each insn.
5930 Override the default sort algorithm to better slot instructions. */
5933 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
5934 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
5935 int *pn_ready, int clock_var)
5937 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
5938 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
5939 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5943 /* We are about to issue INSN. Return the number of insns left on the
5944 ready queue that can be issued this cycle. */
5947 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
5948 int sched_verbose ATTRIBUTE_UNUSED,
5949 rtx insn ATTRIBUTE_UNUSED,
5950 int can_issue_more ATTRIBUTE_UNUSED)
5952 last_scheduled_insn = insn;
5953 memcpy (prev_cycle_state, curr_state, dfa_state_size);
5954 if (reload_completed)
5956 if (group_barrier_needed_p (insn))
5958 if (GET_CODE (insn) == CALL_INSN)
5959 init_insn_group_barriers ();
5960 stops_p [INSN_UID (insn)] = stop_before_p;
5966 /* We are choosing insn from the ready queue. Return nonzero if INSN
5970 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
5972 if (insn == NULL_RTX || !INSN_P (insn))
5974 return (!reload_completed
5975 || !safe_group_barrier_needed_p (insn));
5978 /* The following variable value is pseudo-insn used by the DFA insn
5979 scheduler to change the DFA state when the simulated clock is
5982 static rtx dfa_pre_cycle_insn;
5984 /* We are about to being issuing INSN. Return nonzero if we can not
5985 issue it on given cycle CLOCK and return zero if we should not sort
5986 the ready queue on the next clock start. */
5989 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
5990 int clock, int *sort_p)
5992 int setup_clocks_p = FALSE;
5994 if (insn == NULL_RTX || !INSN_P (insn))
5996 if ((reload_completed && safe_group_barrier_needed_p (insn))
5997 || (last_scheduled_insn
5998 && (GET_CODE (last_scheduled_insn) == CALL_INSN
5999 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6000 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6002 init_insn_group_barriers ();
6003 if (verbose && dump)
6004 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6005 last_clock == clock ? " + cycle advance" : "");
6007 if (last_clock == clock)
6009 state_transition (curr_state, dfa_stop_insn);
6010 if (TARGET_EARLY_STOP_BITS)
6011 *sort_p = (last_scheduled_insn == NULL_RTX
6012 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6017 else if (reload_completed)
6018 setup_clocks_p = TRUE;
6019 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6020 state_transition (curr_state, dfa_stop_insn);
6021 state_transition (curr_state, dfa_pre_cycle_insn);
6022 state_transition (curr_state, NULL);
6024 else if (reload_completed)
6025 setup_clocks_p = TRUE;
6026 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM)
6028 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6030 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6035 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6036 if (REG_NOTE_KIND (link) == 0)
6038 enum attr_itanium_class dep_class;
6039 rtx dep_insn = XEXP (link, 0);
6041 dep_class = ia64_safe_itanium_class (dep_insn);
6042 if ((dep_class == ITANIUM_CLASS_MMMUL
6043 || dep_class == ITANIUM_CLASS_MMSHF)
6044 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6046 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6047 d = last_clock - clocks [INSN_UID (dep_insn)];
6050 add_cycles [INSN_UID (insn)] = 3 - d;
6058 /* The following page contains abstract data `bundle states' which are
6059 used for bundling insns (inserting nops and template generation). */
6061 /* The following describes state of insn bundling. */
6065 /* Unique bundle state number to identify them in the debugging
6068 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6069 /* number nops before and after the insn */
6070 short before_nops_num, after_nops_num;
6071 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6073 int cost; /* cost of the state in cycles */
6074 int accumulated_insns_num; /* number of all previous insns including
6075 nops. L is considered as 2 insns */
6076 int branch_deviation; /* deviation of previous branches from 3rd slots */
6077 struct bundle_state *next; /* next state with the same insn_num */
6078 struct bundle_state *originator; /* originator (previous insn state) */
6079 /* All bundle states are in the following chain. */
6080 struct bundle_state *allocated_states_chain;
6081 /* The DFA State after issuing the insn and the nops. */
6085 /* The following is map insn number to the corresponding bundle state. */
6087 static struct bundle_state **index_to_bundle_states;
6089 /* The unique number of next bundle state. */
6091 static int bundle_states_num;
6093 /* All allocated bundle states are in the following chain. */
6095 static struct bundle_state *allocated_bundle_states_chain;
6097 /* All allocated but not used bundle states are in the following
6100 static struct bundle_state *free_bundle_state_chain;
6103 /* The following function returns a free bundle state. */
6105 static struct bundle_state *
6106 get_free_bundle_state (void)
6108 struct bundle_state *result;
6110 if (free_bundle_state_chain != NULL)
6112 result = free_bundle_state_chain;
6113 free_bundle_state_chain = result->next;
6117 result = xmalloc (sizeof (struct bundle_state));
6118 result->dfa_state = xmalloc (dfa_state_size);
6119 result->allocated_states_chain = allocated_bundle_states_chain;
6120 allocated_bundle_states_chain = result;
6122 result->unique_num = bundle_states_num++;
6127 /* The following function frees given bundle state. */
6130 free_bundle_state (struct bundle_state *state)
6132 state->next = free_bundle_state_chain;
6133 free_bundle_state_chain = state;
6136 /* Start work with abstract data `bundle states'. */
6139 initiate_bundle_states (void)
6141 bundle_states_num = 0;
6142 free_bundle_state_chain = NULL;
6143 allocated_bundle_states_chain = NULL;
6146 /* Finish work with abstract data `bundle states'. */
6149 finish_bundle_states (void)
6151 struct bundle_state *curr_state, *next_state;
6153 for (curr_state = allocated_bundle_states_chain;
6155 curr_state = next_state)
6157 next_state = curr_state->allocated_states_chain;
6158 free (curr_state->dfa_state);
6163 /* Hash table of the bundle states. The key is dfa_state and insn_num
6164 of the bundle states. */
6166 static htab_t bundle_state_table;
6168 /* The function returns hash of BUNDLE_STATE. */
6171 bundle_state_hash (const void *bundle_state)
6173 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6176 for (result = i = 0; i < dfa_state_size; i++)
6177 result += (((unsigned char *) state->dfa_state) [i]
6178 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6179 return result + state->insn_num;
6182 /* The function returns nonzero if the bundle state keys are equal. */
6185 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6187 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6188 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6190 return (state1->insn_num == state2->insn_num
6191 && memcmp (state1->dfa_state, state2->dfa_state,
6192 dfa_state_size) == 0);
6195 /* The function inserts the BUNDLE_STATE into the hash table. The
6196 function returns nonzero if the bundle has been inserted into the
6197 table. The table contains the best bundle state with given key. */
6200 insert_bundle_state (struct bundle_state *bundle_state)
6204 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6205 if (*entry_ptr == NULL)
6207 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6208 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6209 *entry_ptr = (void *) bundle_state;
6212 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6213 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6214 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6215 > bundle_state->accumulated_insns_num
6216 || (((struct bundle_state *)
6217 *entry_ptr)->accumulated_insns_num
6218 == bundle_state->accumulated_insns_num
6219 && ((struct bundle_state *)
6220 *entry_ptr)->branch_deviation
6221 > bundle_state->branch_deviation))))
6224 struct bundle_state temp;
6226 temp = *(struct bundle_state *) *entry_ptr;
6227 *(struct bundle_state *) *entry_ptr = *bundle_state;
6228 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6229 *bundle_state = temp;
6234 /* Start work with the hash table. */
6237 initiate_bundle_state_table (void)
6239 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6243 /* Finish work with the hash table. */
6246 finish_bundle_state_table (void)
6248 htab_delete (bundle_state_table);
6253 /* The following variable is a insn `nop' used to check bundle states
6254 with different number of inserted nops. */
6256 static rtx ia64_nop;
6258 /* The following function tries to issue NOPS_NUM nops for the current
6259 state without advancing processor cycle. If it failed, the
6260 function returns FALSE and frees the current state. */
6263 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6267 for (i = 0; i < nops_num; i++)
6268 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6270 free_bundle_state (curr_state);
6276 /* The following function tries to issue INSN for the current
6277 state without advancing processor cycle. If it failed, the
6278 function returns FALSE and frees the current state. */
6281 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6283 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6285 free_bundle_state (curr_state);
6291 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6292 starting with ORIGINATOR without advancing processor cycle. If
6293 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6294 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6295 If it was successful, the function creates new bundle state and
6296 insert into the hash table and into `index_to_bundle_states'. */
6299 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6300 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6302 struct bundle_state *curr_state;
6304 curr_state = get_free_bundle_state ();
6305 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6306 curr_state->insn = insn;
6307 curr_state->insn_num = originator->insn_num + 1;
6308 curr_state->cost = originator->cost;
6309 curr_state->originator = originator;
6310 curr_state->before_nops_num = before_nops_num;
6311 curr_state->after_nops_num = 0;
6312 curr_state->accumulated_insns_num
6313 = originator->accumulated_insns_num + before_nops_num;
6314 curr_state->branch_deviation = originator->branch_deviation;
6315 if (insn == NULL_RTX)
6317 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6319 if (GET_MODE (insn) == TImode)
6321 if (!try_issue_nops (curr_state, before_nops_num))
6323 if (!try_issue_insn (curr_state, insn))
6325 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6326 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6327 && curr_state->accumulated_insns_num % 3 != 0)
6329 free_bundle_state (curr_state);
6333 else if (GET_MODE (insn) != TImode)
6335 if (!try_issue_nops (curr_state, before_nops_num))
6337 if (!try_issue_insn (curr_state, insn))
6339 curr_state->accumulated_insns_num++;
6340 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6341 || asm_noperands (PATTERN (insn)) >= 0)
6343 if (ia64_safe_type (insn) == TYPE_L)
6344 curr_state->accumulated_insns_num++;
6348 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6349 state_transition (curr_state->dfa_state, NULL);
6351 if (!try_issue_nops (curr_state, before_nops_num))
6353 if (!try_issue_insn (curr_state, insn))
6355 curr_state->accumulated_insns_num++;
6356 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6357 || asm_noperands (PATTERN (insn)) >= 0)
6359 /* Finish bundle containing asm insn. */
6360 curr_state->after_nops_num
6361 = 3 - curr_state->accumulated_insns_num % 3;
6362 curr_state->accumulated_insns_num
6363 += 3 - curr_state->accumulated_insns_num % 3;
6365 else if (ia64_safe_type (insn) == TYPE_L)
6366 curr_state->accumulated_insns_num++;
6368 if (ia64_safe_type (insn) == TYPE_B)
6369 curr_state->branch_deviation
6370 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6371 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6373 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6376 struct bundle_state *curr_state1;
6377 struct bundle_state *allocated_states_chain;
6379 curr_state1 = get_free_bundle_state ();
6380 dfa_state = curr_state1->dfa_state;
6381 allocated_states_chain = curr_state1->allocated_states_chain;
6382 *curr_state1 = *curr_state;
6383 curr_state1->dfa_state = dfa_state;
6384 curr_state1->allocated_states_chain = allocated_states_chain;
6385 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6387 curr_state = curr_state1;
6389 if (!try_issue_nops (curr_state,
6390 3 - curr_state->accumulated_insns_num % 3))
6392 curr_state->after_nops_num
6393 = 3 - curr_state->accumulated_insns_num % 3;
6394 curr_state->accumulated_insns_num
6395 += 3 - curr_state->accumulated_insns_num % 3;
6397 if (!insert_bundle_state (curr_state))
6398 free_bundle_state (curr_state);
6402 /* The following function returns position in the two window bundle
6406 get_max_pos (state_t state)
6408 if (cpu_unit_reservation_p (state, pos_6))
6410 else if (cpu_unit_reservation_p (state, pos_5))
6412 else if (cpu_unit_reservation_p (state, pos_4))
6414 else if (cpu_unit_reservation_p (state, pos_3))
6416 else if (cpu_unit_reservation_p (state, pos_2))
6418 else if (cpu_unit_reservation_p (state, pos_1))
6424 /* The function returns code of a possible template for given position
6425 and state. The function should be called only with 2 values of
6426 position equal to 3 or 6. */
6429 get_template (state_t state, int pos)
6434 if (cpu_unit_reservation_p (state, _0mii_))
6436 else if (cpu_unit_reservation_p (state, _0mmi_))
6438 else if (cpu_unit_reservation_p (state, _0mfi_))
6440 else if (cpu_unit_reservation_p (state, _0mmf_))
6442 else if (cpu_unit_reservation_p (state, _0bbb_))
6444 else if (cpu_unit_reservation_p (state, _0mbb_))
6446 else if (cpu_unit_reservation_p (state, _0mib_))
6448 else if (cpu_unit_reservation_p (state, _0mmb_))
6450 else if (cpu_unit_reservation_p (state, _0mfb_))
6452 else if (cpu_unit_reservation_p (state, _0mlx_))
6457 if (cpu_unit_reservation_p (state, _1mii_))
6459 else if (cpu_unit_reservation_p (state, _1mmi_))
6461 else if (cpu_unit_reservation_p (state, _1mfi_))
6463 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6465 else if (cpu_unit_reservation_p (state, _1bbb_))
6467 else if (cpu_unit_reservation_p (state, _1mbb_))
6469 else if (cpu_unit_reservation_p (state, _1mib_))
6471 else if (cpu_unit_reservation_p (state, _1mmb_))
6473 else if (cpu_unit_reservation_p (state, _1mfb_))
6475 else if (cpu_unit_reservation_p (state, _1mlx_))
6484 /* The following function returns an insn important for insn bundling
6485 followed by INSN and before TAIL. */
6488 get_next_important_insn (rtx insn, rtx tail)
6490 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6492 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6493 && GET_CODE (PATTERN (insn)) != USE
6494 && GET_CODE (PATTERN (insn)) != CLOBBER)
6499 /* The following function does insn bundling. Bundling algorithm is
6500 based on dynamic programming. It tries to insert different number of
6501 nop insns before/after the real insns. At the end of EBB, it chooses the
6502 best alternative and then, moving back in EBB, inserts templates for
6503 the best alternative. The algorithm is directed by information
6504 (changes of simulated processor cycle) created by the 2nd insn
6508 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6510 struct bundle_state *curr_state, *next_state, *best_state;
6511 rtx insn, next_insn;
6513 int i, bundle_end_p, only_bundle_end_p, asm_p;
6514 int pos = 0, max_pos, template0, template1;
6517 enum attr_type type;
6520 for (insn = NEXT_INSN (prev_head_insn);
6521 insn && insn != tail;
6522 insn = NEXT_INSN (insn))
6528 dfa_clean_insn_cache ();
6529 initiate_bundle_state_table ();
6530 index_to_bundle_states = xmalloc ((insn_num + 2)
6531 * sizeof (struct bundle_state *));
6532 /* First (forward) pass -- generates states. */
6533 curr_state = get_free_bundle_state ();
6534 curr_state->insn = NULL;
6535 curr_state->before_nops_num = 0;
6536 curr_state->after_nops_num = 0;
6537 curr_state->insn_num = 0;
6538 curr_state->cost = 0;
6539 curr_state->accumulated_insns_num = 0;
6540 curr_state->branch_deviation = 0;
6541 curr_state->next = NULL;
6542 curr_state->originator = NULL;
6543 state_reset (curr_state->dfa_state);
6544 index_to_bundle_states [0] = curr_state;
6546 for (insn = NEXT_INSN (prev_head_insn);
6548 insn = NEXT_INSN (insn))
6550 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6551 || GET_CODE (PATTERN (insn)) == USE
6552 || GET_CODE (PATTERN (insn)) == CLOBBER)
6553 && GET_MODE (insn) == TImode)
6555 PUT_MODE (insn, VOIDmode);
6556 for (next_insn = NEXT_INSN (insn);
6558 next_insn = NEXT_INSN (next_insn))
6559 if (INSN_P (next_insn)
6560 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6561 && GET_CODE (PATTERN (next_insn)) != USE
6562 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6564 PUT_MODE (next_insn, TImode);
6568 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6573 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6574 || GET_CODE (PATTERN (insn)) == USE
6575 || GET_CODE (PATTERN (insn)) == CLOBBER)
6577 type = ia64_safe_type (insn);
6578 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6580 index_to_bundle_states [insn_num] = NULL;
6581 for (curr_state = index_to_bundle_states [insn_num - 1];
6583 curr_state = next_state)
6585 pos = curr_state->accumulated_insns_num % 3;
6586 next_state = curr_state->next;
6587 /* Finish the current bundle in order to start a subsequent
6588 asm insn in a new bundle. */
6590 = (next_insn != NULL_RTX
6591 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6592 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6594 = (only_bundle_end_p || next_insn == NULL_RTX
6595 || (GET_MODE (next_insn) == TImode
6596 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6597 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6599 /* We need to insert 2 Nops for cases like M_MII. */
6600 || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM
6601 && !bundle_end_p && pos == 1))
6602 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6604 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6606 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6609 if (index_to_bundle_states [insn_num] == NULL)
6611 for (curr_state = index_to_bundle_states [insn_num];
6613 curr_state = curr_state->next)
6614 if (verbose >= 2 && dump)
6618 unsigned short one_automaton_state;
6619 unsigned short oneb_automaton_state;
6620 unsigned short two_automaton_state;
6621 unsigned short twob_automaton_state;
6626 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6627 curr_state->unique_num,
6628 (curr_state->originator == NULL
6629 ? -1 : curr_state->originator->unique_num),
6631 curr_state->before_nops_num, curr_state->after_nops_num,
6632 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6633 (ia64_tune == PROCESSOR_ITANIUM
6634 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6635 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6639 if (index_to_bundle_states [insn_num] == NULL)
6641 /* Finding state with a minimal cost: */
6643 for (curr_state = index_to_bundle_states [insn_num];
6645 curr_state = curr_state->next)
6646 if (curr_state->accumulated_insns_num % 3 == 0
6647 && (best_state == NULL || best_state->cost > curr_state->cost
6648 || (best_state->cost == curr_state->cost
6649 && (curr_state->accumulated_insns_num
6650 < best_state->accumulated_insns_num
6651 || (curr_state->accumulated_insns_num
6652 == best_state->accumulated_insns_num
6653 && curr_state->branch_deviation
6654 < best_state->branch_deviation)))))
6655 best_state = curr_state;
6656 /* Second (backward) pass: adding nops and templates: */
6657 insn_num = best_state->before_nops_num;
6658 template0 = template1 = -1;
6659 for (curr_state = best_state;
6660 curr_state->originator != NULL;
6661 curr_state = curr_state->originator)
6663 insn = curr_state->insn;
6664 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6665 || asm_noperands (PATTERN (insn)) >= 0);
6667 if (verbose >= 2 && dump)
6671 unsigned short one_automaton_state;
6672 unsigned short oneb_automaton_state;
6673 unsigned short two_automaton_state;
6674 unsigned short twob_automaton_state;
6679 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6680 curr_state->unique_num,
6681 (curr_state->originator == NULL
6682 ? -1 : curr_state->originator->unique_num),
6684 curr_state->before_nops_num, curr_state->after_nops_num,
6685 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6686 (ia64_tune == PROCESSOR_ITANIUM
6687 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6688 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6691 max_pos = get_max_pos (curr_state->dfa_state);
6692 if (max_pos == 6 || (max_pos == 3 && template0 < 0))
6696 template0 = get_template (curr_state->dfa_state, 3);
6699 template1 = get_template (curr_state->dfa_state, 3);
6700 template0 = get_template (curr_state->dfa_state, 6);
6703 if (max_pos > 3 && template1 < 0)
6707 template1 = get_template (curr_state->dfa_state, 3);
6711 for (i = 0; i < curr_state->after_nops_num; i++)
6714 emit_insn_after (nop, insn);
6722 b = gen_bundle_selector (GEN_INT (template0));
6723 ia64_emit_insn_before (b, nop);
6724 template0 = template1;
6728 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6729 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6730 && asm_noperands (PATTERN (insn)) < 0)
6732 if (ia64_safe_type (insn) == TYPE_L)
6737 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6738 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6739 && asm_noperands (PATTERN (insn)) < 0)
6743 b = gen_bundle_selector (GEN_INT (template0));
6744 ia64_emit_insn_before (b, insn);
6745 b = PREV_INSN (insn);
6747 template0 = template1;
6750 for (i = 0; i < curr_state->before_nops_num; i++)
6753 ia64_emit_insn_before (nop, insn);
6754 nop = PREV_INSN (insn);
6763 b = gen_bundle_selector (GEN_INT (template0));
6764 ia64_emit_insn_before (b, insn);
6765 b = PREV_INSN (insn);
6767 template0 = template1;
6772 if (ia64_tune == PROCESSOR_ITANIUM)
6773 /* Insert additional cycles for MM-insns: */
6774 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6779 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6780 || GET_CODE (PATTERN (insn)) == USE
6781 || GET_CODE (PATTERN (insn)) == CLOBBER)
6783 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6784 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6790 last = prev_active_insn (insn);
6791 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
6793 last = prev_active_insn (last);
6795 for (;; last = prev_active_insn (last))
6796 if (recog_memoized (last) == CODE_FOR_bundle_selector)
6798 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
6801 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
6804 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6806 if ((pred_stop_p && n == 0) || n > 2
6807 || (template0 == 9 && n != 0))
6809 for (j = 3 - n; j > 0; j --)
6810 ia64_emit_insn_before (gen_nop (), insn);
6811 add_cycles [INSN_UID (insn)]--;
6812 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
6813 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6816 add_cycles [INSN_UID (insn)]--;
6817 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
6819 /* Insert .MII bundle. */
6820 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
6822 ia64_emit_insn_before (gen_nop (), insn);
6823 ia64_emit_insn_before (gen_nop (), insn);
6826 ia64_emit_insn_before
6827 (gen_insn_group_barrier (GEN_INT (3)), insn);
6830 ia64_emit_insn_before (gen_nop (), insn);
6831 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6834 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
6836 for (j = n; j > 0; j --)
6837 ia64_emit_insn_before (gen_nop (), insn);
6839 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6843 free (index_to_bundle_states);
6844 finish_bundle_state_table ();
6846 dfa_clean_insn_cache ();
6849 /* The following function is called at the end of scheduling BB or
6850 EBB. After reload, it inserts stop bits and does insn bundling. */
6853 ia64_sched_finish (FILE *dump, int sched_verbose)
6856 fprintf (dump, "// Finishing schedule.\n");
6857 if (!reload_completed)
6859 if (reload_completed)
6861 final_emit_insn_group_barriers (dump);
6862 bundling (dump, sched_verbose, current_sched_info->prev_head,
6863 current_sched_info->next_tail);
6864 if (sched_verbose && dump)
6865 fprintf (dump, "// finishing %d-%d\n",
6866 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
6867 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
6873 /* The following function inserts stop bits in scheduled BB or EBB. */
6876 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6879 int need_barrier_p = 0;
6880 rtx prev_insn = NULL_RTX;
6882 init_insn_group_barriers ();
6884 for (insn = NEXT_INSN (current_sched_info->prev_head);
6885 insn != current_sched_info->next_tail;
6886 insn = NEXT_INSN (insn))
6888 if (GET_CODE (insn) == BARRIER)
6890 rtx last = prev_active_insn (insn);
6894 if (GET_CODE (last) == JUMP_INSN
6895 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6896 last = prev_active_insn (last);
6897 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6898 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6900 init_insn_group_barriers ();
6902 prev_insn = NULL_RTX;
6904 else if (INSN_P (insn))
6906 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6908 init_insn_group_barriers ();
6910 prev_insn = NULL_RTX;
6912 else if (need_barrier_p || group_barrier_needed_p (insn))
6914 if (TARGET_EARLY_STOP_BITS)
6919 last != current_sched_info->prev_head;
6920 last = PREV_INSN (last))
6921 if (INSN_P (last) && GET_MODE (last) == TImode
6922 && stops_p [INSN_UID (last)])
6924 if (last == current_sched_info->prev_head)
6926 last = prev_active_insn (last);
6928 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
6929 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6931 init_insn_group_barriers ();
6932 for (last = NEXT_INSN (last);
6934 last = NEXT_INSN (last))
6936 group_barrier_needed_p (last);
6940 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6942 init_insn_group_barriers ();
6944 group_barrier_needed_p (insn);
6945 prev_insn = NULL_RTX;
6947 else if (recog_memoized (insn) >= 0)
6949 need_barrier_p = (GET_CODE (insn) == CALL_INSN
6950 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6951 || asm_noperands (PATTERN (insn)) >= 0);
6958 /* If the following function returns TRUE, we will use the the DFA
6962 ia64_use_dfa_pipeline_interface (void)
6967 /* If the following function returns TRUE, we will use the the DFA
6971 ia64_first_cycle_multipass_dfa_lookahead (void)
6973 return (reload_completed ? 6 : 4);
6976 /* The following function initiates variable `dfa_pre_cycle_insn'. */
6979 ia64_init_dfa_pre_cycle_insn (void)
6981 if (temp_dfa_state == NULL)
6983 dfa_state_size = state_size ();
6984 temp_dfa_state = xmalloc (dfa_state_size);
6985 prev_cycle_state = xmalloc (dfa_state_size);
6987 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
6988 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
6989 recog_memoized (dfa_pre_cycle_insn);
6990 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
6991 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
6992 recog_memoized (dfa_stop_insn);
6995 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
6996 used by the DFA insn scheduler. */
6999 ia64_dfa_pre_cycle_insn (void)
7001 return dfa_pre_cycle_insn;
7004 /* The following function returns TRUE if PRODUCER (of type ilog or
7005 ld) produces address for CONSUMER (of type st or stf). */
7008 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7012 if (producer == NULL_RTX || consumer == NULL_RTX)
7014 dest = ia64_single_set (producer);
7015 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7016 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7018 if (GET_CODE (reg) == SUBREG)
7019 reg = SUBREG_REG (reg);
7020 dest = ia64_single_set (consumer);
7021 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7022 || GET_CODE (mem) != MEM)
7024 return reg_mentioned_p (reg, mem);
7027 /* The following function returns TRUE if PRODUCER (of type ilog or
7028 ld) produces address for CONSUMER (of type ld or fld). */
7031 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7033 rtx dest, src, reg, mem;
7035 if (producer == NULL_RTX || consumer == NULL_RTX)
7037 dest = ia64_single_set (producer);
7038 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7039 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7041 if (GET_CODE (reg) == SUBREG)
7042 reg = SUBREG_REG (reg);
7043 src = ia64_single_set (consumer);
7044 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7046 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7047 mem = XVECEXP (mem, 0, 0);
7048 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7049 mem = XEXP (mem, 0);
7051 /* Note that LO_SUM is used for GOT loads. */
7052 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7055 return reg_mentioned_p (reg, mem);
7058 /* The following function returns TRUE if INSN produces address for a
7059 load/store insn. We will place such insns into M slot because it
7060 decreases its latency time. */
7063 ia64_produce_address_p (rtx insn)
7069 /* Emit pseudo-ops for the assembler to describe predicate relations.
7070 At present this assumes that we only consider predicate pairs to
7071 be mutex, and that the assembler can deduce proper values from
7072 straight-line code. */
7075 emit_predicate_relation_info (void)
7079 FOR_EACH_BB_REVERSE (bb)
7082 rtx head = bb->head;
7084 /* We only need such notes at code labels. */
7085 if (GET_CODE (head) != CODE_LABEL)
7087 if (GET_CODE (NEXT_INSN (head)) == NOTE
7088 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7089 head = NEXT_INSN (head);
7091 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7092 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7094 rtx p = gen_rtx_REG (BImode, r);
7095 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7096 if (head == bb->end)
7102 /* Look for conditional calls that do not return, and protect predicate
7103 relations around them. Otherwise the assembler will assume the call
7104 returns, and complain about uses of call-clobbered predicates after
7106 FOR_EACH_BB_REVERSE (bb)
7108 rtx insn = bb->head;
7112 if (GET_CODE (insn) == CALL_INSN
7113 && GET_CODE (PATTERN (insn)) == COND_EXEC
7114 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7116 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7117 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7118 if (bb->head == insn)
7120 if (bb->end == insn)
7124 if (insn == bb->end)
7126 insn = NEXT_INSN (insn);
7131 /* Perform machine dependent operations on the rtl chain INSNS. */
7136 /* We are freeing block_for_insn in the toplev to keep compatibility
7137 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7138 compute_bb_for_insn ();
7140 /* If optimizing, we'll have split before scheduling. */
7142 split_all_insns (0);
7144 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7145 non-optimizing bootstrap. */
7146 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7148 if (ia64_flag_schedule_insns2)
7150 timevar_push (TV_SCHED2);
7151 ia64_final_schedule = 1;
7153 initiate_bundle_states ();
7154 ia64_nop = make_insn_raw (gen_nop ());
7155 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7156 recog_memoized (ia64_nop);
7157 clocks_length = get_max_uid () + 1;
7158 stops_p = xcalloc (1, clocks_length);
7159 if (ia64_tune == PROCESSOR_ITANIUM)
7161 clocks = xcalloc (clocks_length, sizeof (int));
7162 add_cycles = xcalloc (clocks_length, sizeof (int));
7164 if (ia64_tune == PROCESSOR_ITANIUM2)
7166 pos_1 = get_cpu_unit_code ("2_1");
7167 pos_2 = get_cpu_unit_code ("2_2");
7168 pos_3 = get_cpu_unit_code ("2_3");
7169 pos_4 = get_cpu_unit_code ("2_4");
7170 pos_5 = get_cpu_unit_code ("2_5");
7171 pos_6 = get_cpu_unit_code ("2_6");
7172 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7173 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7174 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7175 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7176 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7177 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7178 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7179 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7180 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7181 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7182 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7183 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7184 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7185 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7186 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7187 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7188 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7189 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7190 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7191 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7195 pos_1 = get_cpu_unit_code ("1_1");
7196 pos_2 = get_cpu_unit_code ("1_2");
7197 pos_3 = get_cpu_unit_code ("1_3");
7198 pos_4 = get_cpu_unit_code ("1_4");
7199 pos_5 = get_cpu_unit_code ("1_5");
7200 pos_6 = get_cpu_unit_code ("1_6");
7201 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7202 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7203 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7204 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7205 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7206 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7207 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7208 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7209 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7210 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7211 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7212 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7213 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7214 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7215 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7216 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7217 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7218 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7219 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7220 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7222 schedule_ebbs (rtl_dump_file);
7223 finish_bundle_states ();
7224 if (ia64_tune == PROCESSOR_ITANIUM)
7230 emit_insn_group_barriers (rtl_dump_file);
7232 ia64_final_schedule = 0;
7233 timevar_pop (TV_SCHED2);
7236 emit_all_insn_group_barriers (rtl_dump_file);
7238 /* A call must not be the last instruction in a function, so that the
7239 return address is still within the function, so that unwinding works
7240 properly. Note that IA-64 differs from dwarf2 on this point. */
7241 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7246 insn = get_last_insn ();
7247 if (! INSN_P (insn))
7248 insn = prev_active_insn (insn);
7249 if (GET_CODE (insn) == INSN
7250 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7251 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7254 insn = prev_active_insn (insn);
7256 if (GET_CODE (insn) == CALL_INSN)
7259 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7260 emit_insn (gen_break_f ());
7261 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7266 emit_predicate_relation_info ();
7269 /* Return true if REGNO is used by the epilogue. */
7272 ia64_epilogue_uses (int regno)
7277 /* With a call to a function in another module, we will write a new
7278 value to "gp". After returning from such a call, we need to make
7279 sure the function restores the original gp-value, even if the
7280 function itself does not use the gp anymore. */
7281 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7283 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7284 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7285 /* For functions defined with the syscall_linkage attribute, all
7286 input registers are marked as live at all function exits. This
7287 prevents the register allocator from using the input registers,
7288 which in turn makes it possible to restart a system call after
7289 an interrupt without having to save/restore the input registers.
7290 This also prevents kernel data from leaking to application code. */
7291 return lookup_attribute ("syscall_linkage",
7292 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7295 /* Conditional return patterns can't represent the use of `b0' as
7296 the return address, so we force the value live this way. */
7300 /* Likewise for ar.pfs, which is used by br.ret. */
7308 /* Return true if REGNO is used by the frame unwinder. */
7311 ia64_eh_uses (int regno)
7313 if (! reload_completed)
7316 if (current_frame_info.reg_save_b0
7317 && regno == current_frame_info.reg_save_b0)
7319 if (current_frame_info.reg_save_pr
7320 && regno == current_frame_info.reg_save_pr)
7322 if (current_frame_info.reg_save_ar_pfs
7323 && regno == current_frame_info.reg_save_ar_pfs)
7325 if (current_frame_info.reg_save_ar_unat
7326 && regno == current_frame_info.reg_save_ar_unat)
7328 if (current_frame_info.reg_save_ar_lc
7329 && regno == current_frame_info.reg_save_ar_lc)
7335 /* Return true if this goes in small data/bss. */
7337 /* ??? We could also support own long data here. Generating movl/add/ld8
7338 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7339 code faster because there is one less load. This also includes incomplete
7340 types which can't go in sdata/sbss. */
7343 ia64_in_small_data_p (tree exp)
7345 if (TARGET_NO_SDATA)
7348 /* We want to merge strings, so we never consider them small data. */
7349 if (TREE_CODE (exp) == STRING_CST)
7352 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7354 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7355 if (strcmp (section, ".sdata") == 0
7356 || strcmp (section, ".sbss") == 0)
7361 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7363 /* If this is an incomplete type with size 0, then we can't put it
7364 in sdata because it might be too big when completed. */
7365 if (size > 0 && size <= ia64_section_threshold)
7372 /* Output assembly directives for prologue regions. */
7374 /* The current basic block number. */
7376 static bool last_block;
7378 /* True if we need a copy_state command at the start of the next block. */
7380 static bool need_copy_state;
7382 /* The function emits unwind directives for the start of an epilogue. */
7385 process_epilogue (void)
7387 /* If this isn't the last block of the function, then we need to label the
7388 current state, and copy it back in at the start of the next block. */
7392 fprintf (asm_out_file, "\t.label_state 1\n");
7393 need_copy_state = true;
7396 fprintf (asm_out_file, "\t.restore sp\n");
7399 /* This function processes a SET pattern looking for specific patterns
7400 which result in emitting an assembly directive required for unwinding. */
7403 process_set (FILE *asm_out_file, rtx pat)
7405 rtx src = SET_SRC (pat);
7406 rtx dest = SET_DEST (pat);
7407 int src_regno, dest_regno;
7409 /* Look for the ALLOC insn. */
7410 if (GET_CODE (src) == UNSPEC_VOLATILE
7411 && XINT (src, 1) == UNSPECV_ALLOC
7412 && GET_CODE (dest) == REG)
7414 dest_regno = REGNO (dest);
7416 /* If this isn't the final destination for ar.pfs, the alloc
7417 shouldn't have been marked frame related. */
7418 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7421 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7422 ia64_dbx_register_number (dest_regno));
7426 /* Look for SP = .... */
7427 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7429 if (GET_CODE (src) == PLUS)
7431 rtx op0 = XEXP (src, 0);
7432 rtx op1 = XEXP (src, 1);
7433 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7435 if (INTVAL (op1) < 0)
7436 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7439 process_epilogue ();
7444 else if (GET_CODE (src) == REG
7445 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7446 process_epilogue ();
7453 /* Register move we need to look at. */
7454 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7456 src_regno = REGNO (src);
7457 dest_regno = REGNO (dest);
7462 /* Saving return address pointer. */
7463 if (dest_regno != current_frame_info.reg_save_b0)
7465 fprintf (asm_out_file, "\t.save rp, r%d\n",
7466 ia64_dbx_register_number (dest_regno));
7470 if (dest_regno != current_frame_info.reg_save_pr)
7472 fprintf (asm_out_file, "\t.save pr, r%d\n",
7473 ia64_dbx_register_number (dest_regno));
7476 case AR_UNAT_REGNUM:
7477 if (dest_regno != current_frame_info.reg_save_ar_unat)
7479 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7480 ia64_dbx_register_number (dest_regno));
7484 if (dest_regno != current_frame_info.reg_save_ar_lc)
7486 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7487 ia64_dbx_register_number (dest_regno));
7490 case STACK_POINTER_REGNUM:
7491 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7492 || ! frame_pointer_needed)
7494 fprintf (asm_out_file, "\t.vframe r%d\n",
7495 ia64_dbx_register_number (dest_regno));
7499 /* Everything else should indicate being stored to memory. */
7504 /* Memory store we need to look at. */
7505 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7511 if (GET_CODE (XEXP (dest, 0)) == REG)
7513 base = XEXP (dest, 0);
7516 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7517 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7519 base = XEXP (XEXP (dest, 0), 0);
7520 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7525 if (base == hard_frame_pointer_rtx)
7527 saveop = ".savepsp";
7530 else if (base == stack_pointer_rtx)
7535 src_regno = REGNO (src);
7539 if (current_frame_info.reg_save_b0 != 0)
7541 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7545 if (current_frame_info.reg_save_pr != 0)
7547 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7551 if (current_frame_info.reg_save_ar_lc != 0)
7553 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7557 if (current_frame_info.reg_save_ar_pfs != 0)
7559 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7562 case AR_UNAT_REGNUM:
7563 if (current_frame_info.reg_save_ar_unat != 0)
7565 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7572 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7573 1 << (src_regno - GR_REG (4)));
7581 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7582 1 << (src_regno - BR_REG (1)));
7589 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7590 1 << (src_regno - FR_REG (2)));
7593 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7594 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7595 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7596 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7597 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7598 1 << (src_regno - FR_REG (12)));
7610 /* This function looks at a single insn and emits any directives
7611 required to unwind this insn. */
7613 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7615 if (flag_unwind_tables
7616 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7620 if (GET_CODE (insn) == NOTE
7621 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7623 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7625 /* Restore unwind state from immediately before the epilogue. */
7626 if (need_copy_state)
7628 fprintf (asm_out_file, "\t.body\n");
7629 fprintf (asm_out_file, "\t.copy_state 1\n");
7630 need_copy_state = false;
7634 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7637 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7639 pat = XEXP (pat, 0);
7641 pat = PATTERN (insn);
7643 switch (GET_CODE (pat))
7646 process_set (asm_out_file, pat);
7652 int limit = XVECLEN (pat, 0);
7653 for (par_index = 0; par_index < limit; par_index++)
7655 rtx x = XVECEXP (pat, 0, par_index);
7656 if (GET_CODE (x) == SET)
7657 process_set (asm_out_file, x);
7670 ia64_init_builtins (void)
7672 tree psi_type_node = build_pointer_type (integer_type_node);
7673 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7675 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7676 tree si_ftype_psi_si_si
7677 = build_function_type_list (integer_type_node,
7678 psi_type_node, integer_type_node,
7679 integer_type_node, NULL_TREE);
7681 /* __sync_val_compare_and_swap_di */
7682 tree di_ftype_pdi_di_di
7683 = build_function_type_list (long_integer_type_node,
7684 pdi_type_node, long_integer_type_node,
7685 long_integer_type_node, NULL_TREE);
7686 /* __sync_bool_compare_and_swap_di */
7687 tree si_ftype_pdi_di_di
7688 = build_function_type_list (integer_type_node,
7689 pdi_type_node, long_integer_type_node,
7690 long_integer_type_node, NULL_TREE);
7691 /* __sync_synchronize */
7692 tree void_ftype_void
7693 = build_function_type (void_type_node, void_list_node);
7695 /* __sync_lock_test_and_set_si */
7696 tree si_ftype_psi_si
7697 = build_function_type_list (integer_type_node,
7698 psi_type_node, integer_type_node, NULL_TREE);
7700 /* __sync_lock_test_and_set_di */
7701 tree di_ftype_pdi_di
7702 = build_function_type_list (long_integer_type_node,
7703 pdi_type_node, long_integer_type_node,
7706 /* __sync_lock_release_si */
7708 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7710 /* __sync_lock_release_di */
7712 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7717 /* The __fpreg type. */
7718 fpreg_type = make_node (REAL_TYPE);
7719 /* ??? Once the IA64 back end supports both 80-bit and 128-bit
7720 floating types, this type should have XFmode, not TFmode.
7721 TYPE_PRECISION should be 80 bits, not 128. And, the back end
7722 should know to load/save __fpreg variables using the ldf.fill and
7723 stf.spill instructions. */
7724 TYPE_PRECISION (fpreg_type) = 128;
7725 layout_type (fpreg_type);
7726 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
7728 /* The __float80 type. */
7729 float80_type = make_node (REAL_TYPE);
7730 /* ??? Once the IA64 back end supports both 80-bit and 128-bit
7731 floating types, this type should have XFmode, not TFmode.
7732 TYPE_PRECISION should be 80 bits, not 128. */
7733 TYPE_PRECISION (float80_type) = 128;
7734 layout_type (float80_type);
7735 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
7737 /* The __float128 type. */
7738 if (INTEL_EXTENDED_IEEE_FORMAT)
7740 tree float128_type = make_node (REAL_TYPE);
7741 TYPE_PRECISION (float128_type) = 128;
7742 layout_type (float128_type);
7743 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
7746 /* This is a synonym for "long double". */
7747 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
7750 #define def_builtin(name, type, code) \
7751 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7753 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7754 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7755 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7756 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7757 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7758 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7759 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7760 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7762 def_builtin ("__sync_synchronize", void_ftype_void,
7763 IA64_BUILTIN_SYNCHRONIZE);
7765 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7766 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7767 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7768 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7769 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7770 IA64_BUILTIN_LOCK_RELEASE_SI);
7771 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7772 IA64_BUILTIN_LOCK_RELEASE_DI);
7774 def_builtin ("__builtin_ia64_bsp",
7775 build_function_type (ptr_type_node, void_list_node),
7778 def_builtin ("__builtin_ia64_flushrs",
7779 build_function_type (void_type_node, void_list_node),
7780 IA64_BUILTIN_FLUSHRS);
7782 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7783 IA64_BUILTIN_FETCH_AND_ADD_SI);
7784 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7785 IA64_BUILTIN_FETCH_AND_SUB_SI);
7786 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7787 IA64_BUILTIN_FETCH_AND_OR_SI);
7788 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7789 IA64_BUILTIN_FETCH_AND_AND_SI);
7790 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7791 IA64_BUILTIN_FETCH_AND_XOR_SI);
7792 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7793 IA64_BUILTIN_FETCH_AND_NAND_SI);
7795 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7796 IA64_BUILTIN_ADD_AND_FETCH_SI);
7797 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7798 IA64_BUILTIN_SUB_AND_FETCH_SI);
7799 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7800 IA64_BUILTIN_OR_AND_FETCH_SI);
7801 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7802 IA64_BUILTIN_AND_AND_FETCH_SI);
7803 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7804 IA64_BUILTIN_XOR_AND_FETCH_SI);
7805 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7806 IA64_BUILTIN_NAND_AND_FETCH_SI);
7808 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7809 IA64_BUILTIN_FETCH_AND_ADD_DI);
7810 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7811 IA64_BUILTIN_FETCH_AND_SUB_DI);
7812 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7813 IA64_BUILTIN_FETCH_AND_OR_DI);
7814 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7815 IA64_BUILTIN_FETCH_AND_AND_DI);
7816 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7817 IA64_BUILTIN_FETCH_AND_XOR_DI);
7818 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7819 IA64_BUILTIN_FETCH_AND_NAND_DI);
7821 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7822 IA64_BUILTIN_ADD_AND_FETCH_DI);
7823 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7824 IA64_BUILTIN_SUB_AND_FETCH_DI);
7825 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7826 IA64_BUILTIN_OR_AND_FETCH_DI);
7827 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7828 IA64_BUILTIN_AND_AND_FETCH_DI);
7829 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7830 IA64_BUILTIN_XOR_AND_FETCH_DI);
7831 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7832 IA64_BUILTIN_NAND_AND_FETCH_DI);
7837 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7845 cmpxchgsz.acq tmp = [ptr], tmp
7846 } while (tmp != ret)
7850 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
7851 tree arglist, rtx target)
7853 rtx ret, label, tmp, ccv, insn, mem, value;
7856 arg0 = TREE_VALUE (arglist);
7857 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7858 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7859 #ifdef POINTERS_EXTEND_UNSIGNED
7860 if (GET_MODE(mem) != Pmode)
7861 mem = convert_memory_address (Pmode, mem);
7863 value = expand_expr (arg1, NULL_RTX, mode, 0);
7865 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7866 MEM_VOLATILE_P (mem) = 1;
7868 if (target && register_operand (target, mode))
7871 ret = gen_reg_rtx (mode);
7873 emit_insn (gen_mf ());
7875 /* Special case for fetchadd instructions. */
7876 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7879 insn = gen_fetchadd_acq_si (ret, mem, value);
7881 insn = gen_fetchadd_acq_di (ret, mem, value);
7886 tmp = gen_reg_rtx (mode);
7887 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7888 emit_move_insn (tmp, mem);
7890 label = gen_label_rtx ();
7892 emit_move_insn (ret, tmp);
7893 emit_move_insn (ccv, tmp);
7895 /* Perform the specific operation. Special case NAND by noticing
7896 one_cmpl_optab instead. */
7897 if (binoptab == one_cmpl_optab)
7899 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7900 binoptab = and_optab;
7902 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7905 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7907 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7910 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7915 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7922 ret = tmp <op> value;
7923 cmpxchgsz.acq tmp = [ptr], ret
7924 } while (tmp != old)
7928 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
7929 tree arglist, rtx target)
7931 rtx old, label, tmp, ret, ccv, insn, mem, value;
7934 arg0 = TREE_VALUE (arglist);
7935 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7936 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7937 #ifdef POINTERS_EXTEND_UNSIGNED
7938 if (GET_MODE(mem) != Pmode)
7939 mem = convert_memory_address (Pmode, mem);
7942 value = expand_expr (arg1, NULL_RTX, mode, 0);
7944 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7945 MEM_VOLATILE_P (mem) = 1;
7947 if (target && ! register_operand (target, mode))
7950 emit_insn (gen_mf ());
7951 tmp = gen_reg_rtx (mode);
7952 old = gen_reg_rtx (mode);
7953 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7955 emit_move_insn (tmp, mem);
7957 label = gen_label_rtx ();
7959 emit_move_insn (old, tmp);
7960 emit_move_insn (ccv, tmp);
7962 /* Perform the specific operation. Special case NAND by noticing
7963 one_cmpl_optab instead. */
7964 if (binoptab == one_cmpl_optab)
7966 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7967 binoptab = and_optab;
7969 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7972 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7974 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7977 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7982 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7986 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7989 For bool_ it's the same except return ret == oldval.
7993 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
7994 int boolp, tree arglist, rtx target)
7996 tree arg0, arg1, arg2;
7997 rtx mem, old, new, ccv, tmp, insn;
7999 arg0 = TREE_VALUE (arglist);
8000 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8001 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8002 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8003 old = expand_expr (arg1, NULL_RTX, mode, 0);
8004 new = expand_expr (arg2, NULL_RTX, mode, 0);
8006 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8007 MEM_VOLATILE_P (mem) = 1;
8009 if (! register_operand (old, mode))
8010 old = copy_to_mode_reg (mode, old);
8011 if (! register_operand (new, mode))
8012 new = copy_to_mode_reg (mode, new);
8014 if (! boolp && target && register_operand (target, mode))
8017 tmp = gen_reg_rtx (mode);
8019 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8021 emit_move_insn (ccv, old);
8024 rtx ccvtmp = gen_reg_rtx (DImode);
8025 emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
8026 emit_move_insn (ccv, ccvtmp);
8028 emit_insn (gen_mf ());
8030 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8032 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8038 target = gen_reg_rtx (rmode);
8039 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8045 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8048 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8052 rtx mem, new, ret, insn;
8054 arg0 = TREE_VALUE (arglist);
8055 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8056 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8057 new = expand_expr (arg1, NULL_RTX, mode, 0);
8059 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8060 MEM_VOLATILE_P (mem) = 1;
8061 if (! register_operand (new, mode))
8062 new = copy_to_mode_reg (mode, new);
8064 if (target && register_operand (target, mode))
8067 ret = gen_reg_rtx (mode);
8070 insn = gen_xchgsi (ret, mem, new);
8072 insn = gen_xchgdi (ret, mem, new);
8078 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8081 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8082 rtx target ATTRIBUTE_UNUSED)
8087 arg0 = TREE_VALUE (arglist);
8088 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8090 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8091 MEM_VOLATILE_P (mem) = 1;
8093 emit_move_insn (mem, const0_rtx);
8099 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8100 enum machine_mode mode ATTRIBUTE_UNUSED,
8101 int ignore ATTRIBUTE_UNUSED)
8103 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8104 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8105 tree arglist = TREE_OPERAND (exp, 1);
8106 enum machine_mode rmode = VOIDmode;
8110 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8111 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8116 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8117 case IA64_BUILTIN_LOCK_RELEASE_SI:
8118 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8119 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8120 case IA64_BUILTIN_FETCH_AND_OR_SI:
8121 case IA64_BUILTIN_FETCH_AND_AND_SI:
8122 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8123 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8124 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8125 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8126 case IA64_BUILTIN_OR_AND_FETCH_SI:
8127 case IA64_BUILTIN_AND_AND_FETCH_SI:
8128 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8129 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8133 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8138 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8143 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8144 case IA64_BUILTIN_LOCK_RELEASE_DI:
8145 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8146 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8147 case IA64_BUILTIN_FETCH_AND_OR_DI:
8148 case IA64_BUILTIN_FETCH_AND_AND_DI:
8149 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8150 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8151 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8152 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8153 case IA64_BUILTIN_OR_AND_FETCH_DI:
8154 case IA64_BUILTIN_AND_AND_FETCH_DI:
8155 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8156 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8166 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8167 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8168 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8171 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8172 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8173 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8176 case IA64_BUILTIN_SYNCHRONIZE:
8177 emit_insn (gen_mf ());
8180 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8181 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8182 return ia64_expand_lock_test_and_set (mode, arglist, target);
8184 case IA64_BUILTIN_LOCK_RELEASE_SI:
8185 case IA64_BUILTIN_LOCK_RELEASE_DI:
8186 return ia64_expand_lock_release (mode, arglist, target);
8188 case IA64_BUILTIN_BSP:
8189 if (! target || ! register_operand (target, DImode))
8190 target = gen_reg_rtx (DImode);
8191 emit_insn (gen_bsp_value (target));
8192 #ifdef POINTERS_EXTEND_UNSIGNED
8193 target = convert_memory_address (ptr_mode, target);
8197 case IA64_BUILTIN_FLUSHRS:
8198 emit_insn (gen_flushrs ());
8201 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8202 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8203 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8205 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8206 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8207 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8209 case IA64_BUILTIN_FETCH_AND_OR_SI:
8210 case IA64_BUILTIN_FETCH_AND_OR_DI:
8211 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8213 case IA64_BUILTIN_FETCH_AND_AND_SI:
8214 case IA64_BUILTIN_FETCH_AND_AND_DI:
8215 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8217 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8218 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8219 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8221 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8222 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8223 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8225 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8226 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8227 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8229 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8230 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8231 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8233 case IA64_BUILTIN_OR_AND_FETCH_SI:
8234 case IA64_BUILTIN_OR_AND_FETCH_DI:
8235 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8237 case IA64_BUILTIN_AND_AND_FETCH_SI:
8238 case IA64_BUILTIN_AND_AND_FETCH_DI:
8239 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8241 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8242 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8243 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8245 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8246 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8247 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8256 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8257 most significant bits of the stack slot. */
8260 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8262 /* Exception to normal case for structures/unions/etc. */
8264 if (type && AGGREGATE_TYPE_P (type)
8265 && int_size_in_bytes (type) < UNITS_PER_WORD)
8268 /* Fall back to the default. */
8269 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8272 /* Linked list of all external functions that are to be emitted by GCC.
8273 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8274 order to avoid putting out names that are never really used. */
8276 struct extern_func_list
8278 struct extern_func_list *next; /* next external */
8279 char *name; /* name of the external */
8280 } *extern_func_head = 0;
8283 ia64_hpux_add_extern_decl (const char *name)
8285 struct extern_func_list *p;
8287 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8288 p->name = xmalloc (strlen (name) + 1);
8289 strcpy(p->name, name);
8290 p->next = extern_func_head;
8291 extern_func_head = p;
8294 /* Print out the list of used global functions. */
8297 ia64_hpux_file_end (void)
8299 while (extern_func_head)
8301 const char *real_name;
8304 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8305 decl = maybe_get_identifier (real_name);
8308 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8311 TREE_ASM_WRITTEN (decl) = 1;
8312 (*targetm.asm_out.globalize_label) (asm_out_file,
8313 extern_func_head->name);
8314 fputs (TYPE_ASM_OP, asm_out_file);
8315 assemble_name (asm_out_file, extern_func_head->name);
8316 putc (',', asm_out_file);
8317 fprintf (asm_out_file, TYPE_OPERAND_FMT, "function");
8318 putc ('\n', asm_out_file);
8320 extern_func_head = extern_func_head->next;
8324 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8327 ia64_hpux_init_libfuncs (void)
8329 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8330 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8331 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8332 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8333 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8334 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8335 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8336 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8338 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8339 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8340 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8341 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8342 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8343 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8345 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8346 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8347 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8348 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8350 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8351 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8352 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8353 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8355 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8356 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8359 /* Rename the division and modulus functions in VMS. */
8362 ia64_vms_init_libfuncs (void)
8364 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8365 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8366 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8367 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8368 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8369 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8370 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8371 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8374 /* Switch to the section to which we should output X. The only thing
8375 special we do here is to honor small data. */
8378 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8379 unsigned HOST_WIDE_INT align)
8381 if (GET_MODE_SIZE (mode) > 0
8382 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8385 default_elf_select_rtx_section (mode, x, align);
8388 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8389 Pretend flag_pic is always set. */
8392 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8394 default_elf_select_section_1 (exp, reloc, align, true);
8398 ia64_rwreloc_unique_section (tree decl, int reloc)
8400 default_unique_section_1 (decl, reloc, true);
8404 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8405 unsigned HOST_WIDE_INT align)
8407 int save_pic = flag_pic;
8409 ia64_select_rtx_section (mode, x, align);
8410 flag_pic = save_pic;
8414 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8416 return default_section_type_flags_1 (decl, name, reloc, true);
8420 /* Output the assembler code for a thunk function. THUNK_DECL is the
8421 declaration for the thunk function itself, FUNCTION is the decl for
8422 the target function. DELTA is an immediate constant offset to be
8423 added to THIS. If VCALL_OFFSET is nonzero, the word at
8424 *(*this + vcall_offset) should be added to THIS. */
8427 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8428 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8431 rtx this, insn, funexp;
8433 reload_completed = 1;
8434 epilogue_completed = 1;
8437 /* Set things up as ia64_expand_prologue might. */
8438 last_scratch_gr_reg = 15;
8440 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8441 current_frame_info.spill_cfa_off = -16;
8442 current_frame_info.n_input_regs = 1;
8443 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8445 if (!TARGET_REG_NAMES)
8446 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8448 /* Mark the end of the (empty) prologue. */
8449 emit_note (NOTE_INSN_PROLOGUE_END);
8451 this = gen_rtx_REG (Pmode, IN_REG (0));
8454 rtx tmp = gen_rtx_REG (ptr_mode, IN_REG (0));
8455 REG_POINTER (tmp) = 1;
8456 if (delta && CONST_OK_FOR_I (delta))
8458 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8462 emit_insn (gen_ptr_extend (this, tmp));
8465 /* Apply the constant offset, if required. */
8468 rtx delta_rtx = GEN_INT (delta);
8470 if (!CONST_OK_FOR_I (delta))
8472 rtx tmp = gen_rtx_REG (Pmode, 2);
8473 emit_move_insn (tmp, delta_rtx);
8476 emit_insn (gen_adddi3 (this, this, delta_rtx));
8479 /* Apply the offset from the vtable, if required. */
8482 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8483 rtx tmp = gen_rtx_REG (Pmode, 2);
8487 rtx t = gen_rtx_REG (ptr_mode, 2);
8488 REG_POINTER (t) = 1;
8489 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8490 if (CONST_OK_FOR_I (vcall_offset))
8492 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8497 emit_insn (gen_ptr_extend (tmp, t));
8500 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8504 if (!CONST_OK_FOR_J (vcall_offset))
8506 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8507 emit_move_insn (tmp2, vcall_offset_rtx);
8508 vcall_offset_rtx = tmp2;
8510 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8514 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8515 gen_rtx_MEM (ptr_mode, tmp));
8517 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8519 emit_insn (gen_adddi3 (this, this, tmp));
8522 /* Generate a tail call to the target function. */
8523 if (! TREE_USED (function))
8525 assemble_external (function);
8526 TREE_USED (function) = 1;
8528 funexp = XEXP (DECL_RTL (function), 0);
8529 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8530 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8531 insn = get_last_insn ();
8532 SIBLING_CALL_P (insn) = 1;
8534 /* Code generation for calls relies on splitting. */
8535 reload_completed = 1;
8536 epilogue_completed = 1;
8537 try_split (PATTERN (insn), insn, 0);
8541 /* Run just enough of rest_of_compilation to get the insns emitted.
8542 There's not really enough bulk here to make other passes such as
8543 instruction scheduling worth while. Note that use_thunk calls
8544 assemble_start_function and assemble_end_function. */
8546 insn_locators_initialize ();
8547 emit_all_insn_group_barriers (NULL);
8548 insn = get_insns ();
8549 shorten_branches (insn);
8550 final_start_function (insn, file, 1);
8551 final (insn, file, 1, 0);
8552 final_end_function ();
8554 reload_completed = 0;
8555 epilogue_completed = 0;
8559 #include "gt-ia64.h"