1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "diagnostic-core.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
63 #include "dwarf2out.h"
66 /* This is used for communication between ASM_OUTPUT_LABEL and
67 ASM_OUTPUT_LABELREF. */
68 int ia64_asm_output_label = 0;
70 /* Register names for ia64_expand_prologue. */
71 static const char * const ia64_reg_numbers[96] =
72 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81 "r104","r105","r106","r107","r108","r109","r110","r111",
82 "r112","r113","r114","r115","r116","r117","r118","r119",
83 "r120","r121","r122","r123","r124","r125","r126","r127"};
85 /* ??? These strings could be shared with REGISTER_NAMES. */
86 static const char * const ia64_input_reg_names[8] =
87 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_local_reg_names[80] =
91 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
102 /* ??? These strings could be shared with REGISTER_NAMES. */
103 static const char * const ia64_output_reg_names[8] =
104 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
106 /* Variables which are this size or smaller are put in the sdata/sbss
109 unsigned int ia64_section_threshold;
111 /* The following variable is used by the DFA insn scheduler. The value is
112 TRUE if we do insn bundling instead of insn scheduling. */
124 number_of_ia64_frame_regs
127 /* Structure to be filled in by ia64_compute_frame_size with register
128 save masks and offsets for the current function. */
130 struct ia64_frame_info
132 HOST_WIDE_INT total_size; /* size of the stack frame, not including
133 the caller's scratch area. */
134 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
135 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
136 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
137 HARD_REG_SET mask; /* mask of saved registers. */
138 unsigned int gr_used_mask; /* mask of registers in use as gr spill
139 registers or long-term scratches. */
140 int n_spilled; /* number of spilled registers. */
141 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
142 int n_input_regs; /* number of input registers used. */
143 int n_local_regs; /* number of local registers used. */
144 int n_output_regs; /* number of output registers used. */
145 int n_rotate_regs; /* number of rotating registers used. */
147 char need_regstk; /* true if a .regstk directive needed. */
148 char initialized; /* true if the data is finalized. */
151 /* Current frame information calculated by ia64_compute_frame_size. */
152 static struct ia64_frame_info current_frame_info;
153 /* The actual registers that are emitted. */
154 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
156 static int ia64_first_cycle_multipass_dfa_lookahead (void);
157 static void ia64_dependencies_evaluation_hook (rtx, rtx);
158 static void ia64_init_dfa_pre_cycle_insn (void);
159 static rtx ia64_dfa_pre_cycle_insn (void);
160 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
161 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
162 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
163 static void ia64_h_i_d_extended (void);
164 static void * ia64_alloc_sched_context (void);
165 static void ia64_init_sched_context (void *, bool);
166 static void ia64_set_sched_context (void *);
167 static void ia64_clear_sched_context (void *);
168 static void ia64_free_sched_context (void *);
169 static int ia64_mode_to_int (enum machine_mode);
170 static void ia64_set_sched_flags (spec_info_t);
171 static ds_t ia64_get_insn_spec_ds (rtx);
172 static ds_t ia64_get_insn_checked_ds (rtx);
173 static bool ia64_skip_rtx_p (const_rtx);
174 static int ia64_speculate_insn (rtx, ds_t, rtx *);
175 static bool ia64_needs_block_p (int);
176 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
177 static int ia64_spec_check_p (rtx);
178 static int ia64_spec_check_src_p (rtx);
179 static rtx gen_tls_get_addr (void);
180 static rtx gen_thread_pointer (void);
181 static int find_gr_spill (enum ia64_frame_regs, int);
182 static int next_scratch_gr_reg (void);
183 static void mark_reg_gr_used_mask (rtx, void *);
184 static void ia64_compute_frame_size (HOST_WIDE_INT);
185 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
186 static void finish_spill_pointers (void);
187 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
188 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
189 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
190 static rtx gen_movdi_x (rtx, rtx, rtx);
191 static rtx gen_fr_spill_x (rtx, rtx, rtx);
192 static rtx gen_fr_restore_x (rtx, rtx, rtx);
194 static void ia64_option_override (void);
195 static bool ia64_can_eliminate (const int, const int);
196 static enum machine_mode hfa_element_mode (const_tree, bool);
197 static void ia64_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
199 static int ia64_arg_partial_bytes (cumulative_args_t, enum machine_mode,
201 static rtx ia64_function_arg_1 (cumulative_args_t, enum machine_mode,
202 const_tree, bool, bool);
203 static rtx ia64_function_arg (cumulative_args_t, enum machine_mode,
205 static rtx ia64_function_incoming_arg (cumulative_args_t,
206 enum machine_mode, const_tree, bool);
207 static void ia64_function_arg_advance (cumulative_args_t, enum machine_mode,
209 static unsigned int ia64_function_arg_boundary (enum machine_mode,
211 static bool ia64_function_ok_for_sibcall (tree, tree);
212 static bool ia64_return_in_memory (const_tree, const_tree);
213 static rtx ia64_function_value (const_tree, const_tree, bool);
214 static rtx ia64_libcall_value (enum machine_mode, const_rtx);
215 static bool ia64_function_value_regno_p (const unsigned int);
216 static int ia64_register_move_cost (enum machine_mode, reg_class_t,
218 static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
220 static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
221 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
222 static void fix_range (const char *);
223 static struct machine_function * ia64_init_machine_status (void);
224 static void emit_insn_group_barriers (FILE *);
225 static void emit_all_insn_group_barriers (FILE *);
226 static void final_emit_insn_group_barriers (FILE *);
227 static void emit_predicate_relation_info (void);
228 static void ia64_reorg (void);
229 static bool ia64_in_small_data_p (const_tree);
230 static void process_epilogue (FILE *, rtx, bool, bool);
232 static bool ia64_assemble_integer (rtx, unsigned int, int);
233 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
234 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
235 static void ia64_output_function_end_prologue (FILE *);
237 static void ia64_print_operand (FILE *, rtx, int);
238 static void ia64_print_operand_address (FILE *, rtx);
239 static bool ia64_print_operand_punct_valid_p (unsigned char code);
241 static int ia64_issue_rate (void);
242 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
243 static void ia64_sched_init (FILE *, int, int);
244 static void ia64_sched_init_global (FILE *, int, int);
245 static void ia64_sched_finish_global (FILE *, int);
246 static void ia64_sched_finish (FILE *, int);
247 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
248 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
249 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
250 static int ia64_variable_issue (FILE *, int, rtx, int);
252 static void ia64_asm_unwind_emit (FILE *, rtx);
253 static void ia64_asm_emit_except_personality (rtx);
254 static void ia64_asm_init_sections (void);
256 static enum unwind_info_type ia64_debug_unwind_info (void);
258 static struct bundle_state *get_free_bundle_state (void);
259 static void free_bundle_state (struct bundle_state *);
260 static void initiate_bundle_states (void);
261 static void finish_bundle_states (void);
262 static unsigned bundle_state_hash (const void *);
263 static int bundle_state_eq_p (const void *, const void *);
264 static int insert_bundle_state (struct bundle_state *);
265 static void initiate_bundle_state_table (void);
266 static void finish_bundle_state_table (void);
267 static int try_issue_nops (struct bundle_state *, int);
268 static int try_issue_insn (struct bundle_state *, rtx);
269 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
270 static int get_max_pos (state_t);
271 static int get_template (state_t, int);
273 static rtx get_next_important_insn (rtx, rtx);
274 static bool important_for_bundling_p (rtx);
275 static void bundling (FILE *, int, rtx, rtx);
277 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
278 HOST_WIDE_INT, tree);
279 static void ia64_file_start (void);
280 static void ia64_globalize_decl_name (FILE *, tree);
282 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
283 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
284 static section *ia64_select_rtx_section (enum machine_mode, rtx,
285 unsigned HOST_WIDE_INT);
286 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
288 static unsigned int ia64_section_type_flags (tree, const char *, int);
289 static void ia64_init_libfuncs (void)
291 static void ia64_hpux_init_libfuncs (void)
293 static void ia64_sysv4_init_libfuncs (void)
295 static void ia64_vms_init_libfuncs (void)
297 static void ia64_soft_fp_init_libfuncs (void)
299 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
301 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
304 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
305 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
306 static void ia64_encode_section_info (tree, rtx, int);
307 static rtx ia64_struct_value_rtx (tree, int);
308 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
309 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
310 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
311 static bool ia64_legitimate_constant_p (enum machine_mode, rtx);
312 static bool ia64_legitimate_address_p (enum machine_mode, rtx, bool);
313 static bool ia64_cannot_force_const_mem (enum machine_mode, rtx);
314 static const char *ia64_mangle_type (const_tree);
315 static const char *ia64_invalid_conversion (const_tree, const_tree);
316 static const char *ia64_invalid_unary_op (int, const_tree);
317 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
318 static enum machine_mode ia64_c_mode_for_suffix (char);
319 static enum machine_mode ia64_promote_function_mode (const_tree,
324 static void ia64_trampoline_init (rtx, tree, rtx);
325 static void ia64_override_options_after_change (void);
327 static tree ia64_builtin_decl (unsigned, bool);
329 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
330 static enum machine_mode ia64_get_reg_raw_mode (int regno);
331 static section * ia64_hpux_function_section (tree, enum node_frequency,
334 /* Table of valid machine attributes. */
335 static const struct attribute_spec ia64_attribute_table[] =
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
338 affects_type_identity } */
339 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
340 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
342 #if TARGET_ABI_OPEN_VMS
343 { "common_object", 1, 1, true, false, false,
344 ia64_vms_common_object_attribute, false },
346 { "version_id", 1, 1, true, false, false,
347 ia64_handle_version_id_attribute, false },
348 { NULL, 0, 0, false, false, false, NULL, false }
351 /* Initialize the GCC target structure. */
352 #undef TARGET_ATTRIBUTE_TABLE
353 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
355 #undef TARGET_INIT_BUILTINS
356 #define TARGET_INIT_BUILTINS ia64_init_builtins
358 #undef TARGET_EXPAND_BUILTIN
359 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
361 #undef TARGET_BUILTIN_DECL
362 #define TARGET_BUILTIN_DECL ia64_builtin_decl
364 #undef TARGET_ASM_BYTE_OP
365 #define TARGET_ASM_BYTE_OP "\tdata1\t"
366 #undef TARGET_ASM_ALIGNED_HI_OP
367 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
368 #undef TARGET_ASM_ALIGNED_SI_OP
369 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
370 #undef TARGET_ASM_ALIGNED_DI_OP
371 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
372 #undef TARGET_ASM_UNALIGNED_HI_OP
373 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
374 #undef TARGET_ASM_UNALIGNED_SI_OP
375 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
376 #undef TARGET_ASM_UNALIGNED_DI_OP
377 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
378 #undef TARGET_ASM_INTEGER
379 #define TARGET_ASM_INTEGER ia64_assemble_integer
381 #undef TARGET_OPTION_OVERRIDE
382 #define TARGET_OPTION_OVERRIDE ia64_option_override
384 #undef TARGET_ASM_FUNCTION_PROLOGUE
385 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
386 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
387 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
388 #undef TARGET_ASM_FUNCTION_EPILOGUE
389 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
391 #undef TARGET_PRINT_OPERAND
392 #define TARGET_PRINT_OPERAND ia64_print_operand
393 #undef TARGET_PRINT_OPERAND_ADDRESS
394 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
395 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
396 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
398 #undef TARGET_IN_SMALL_DATA_P
399 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
401 #undef TARGET_SCHED_ADJUST_COST_2
402 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
403 #undef TARGET_SCHED_ISSUE_RATE
404 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
405 #undef TARGET_SCHED_VARIABLE_ISSUE
406 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
407 #undef TARGET_SCHED_INIT
408 #define TARGET_SCHED_INIT ia64_sched_init
409 #undef TARGET_SCHED_FINISH
410 #define TARGET_SCHED_FINISH ia64_sched_finish
411 #undef TARGET_SCHED_INIT_GLOBAL
412 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
413 #undef TARGET_SCHED_FINISH_GLOBAL
414 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
415 #undef TARGET_SCHED_REORDER
416 #define TARGET_SCHED_REORDER ia64_sched_reorder
417 #undef TARGET_SCHED_REORDER2
418 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
420 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
421 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
423 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
424 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
426 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
427 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
428 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
429 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
431 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
432 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
433 ia64_first_cycle_multipass_dfa_lookahead_guard
435 #undef TARGET_SCHED_DFA_NEW_CYCLE
436 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
438 #undef TARGET_SCHED_H_I_D_EXTENDED
439 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
441 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
442 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
444 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
445 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
447 #undef TARGET_SCHED_SET_SCHED_CONTEXT
448 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
450 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
451 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
453 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
454 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
456 #undef TARGET_SCHED_SET_SCHED_FLAGS
457 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
459 #undef TARGET_SCHED_GET_INSN_SPEC_DS
460 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
462 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
463 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
465 #undef TARGET_SCHED_SPECULATE_INSN
466 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
468 #undef TARGET_SCHED_NEEDS_BLOCK_P
469 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
471 #undef TARGET_SCHED_GEN_SPEC_CHECK
472 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
474 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
475 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
476 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
478 #undef TARGET_SCHED_SKIP_RTX_P
479 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
481 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
482 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
483 #undef TARGET_ARG_PARTIAL_BYTES
484 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
485 #undef TARGET_FUNCTION_ARG
486 #define TARGET_FUNCTION_ARG ia64_function_arg
487 #undef TARGET_FUNCTION_INCOMING_ARG
488 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
489 #undef TARGET_FUNCTION_ARG_ADVANCE
490 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
491 #undef TARGET_FUNCTION_ARG_BOUNDARY
492 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
499 #undef TARGET_ASM_FILE_START
500 #define TARGET_ASM_FILE_START ia64_file_start
502 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
503 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
505 #undef TARGET_REGISTER_MOVE_COST
506 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
507 #undef TARGET_MEMORY_MOVE_COST
508 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS ia64_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
514 #undef TARGET_UNSPEC_MAY_TRAP_P
515 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
517 #undef TARGET_MACHINE_DEPENDENT_REORG
518 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
520 #undef TARGET_ENCODE_SECTION_INFO
521 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
523 #undef TARGET_SECTION_TYPE_FLAGS
524 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
527 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
528 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
531 #undef TARGET_PROMOTE_FUNCTION_MODE
532 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
534 /* ??? Investigate. */
536 #undef TARGET_PROMOTE_PROTOTYPES
537 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
540 #undef TARGET_FUNCTION_VALUE
541 #define TARGET_FUNCTION_VALUE ia64_function_value
542 #undef TARGET_LIBCALL_VALUE
543 #define TARGET_LIBCALL_VALUE ia64_libcall_value
544 #undef TARGET_FUNCTION_VALUE_REGNO_P
545 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
547 #undef TARGET_STRUCT_VALUE_RTX
548 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
549 #undef TARGET_RETURN_IN_MEMORY
550 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
551 #undef TARGET_SETUP_INCOMING_VARARGS
552 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
553 #undef TARGET_STRICT_ARGUMENT_NAMING
554 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
555 #undef TARGET_MUST_PASS_IN_STACK
556 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
557 #undef TARGET_GET_RAW_RESULT_MODE
558 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
559 #undef TARGET_GET_RAW_ARG_MODE
560 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
562 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
563 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
565 #undef TARGET_ASM_UNWIND_EMIT
566 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
567 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
568 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
569 #undef TARGET_ASM_INIT_SECTIONS
570 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
572 #undef TARGET_DEBUG_UNWIND_INFO
573 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
575 #undef TARGET_SCALAR_MODE_SUPPORTED_P
576 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
577 #undef TARGET_VECTOR_MODE_SUPPORTED_P
578 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
580 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
581 in an order different from the specified program order. */
582 #undef TARGET_RELAXED_ORDERING
583 #define TARGET_RELAXED_ORDERING true
585 #undef TARGET_LEGITIMATE_CONSTANT_P
586 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
587 #undef TARGET_LEGITIMATE_ADDRESS_P
588 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
590 #undef TARGET_CANNOT_FORCE_CONST_MEM
591 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
593 #undef TARGET_MANGLE_TYPE
594 #define TARGET_MANGLE_TYPE ia64_mangle_type
596 #undef TARGET_INVALID_CONVERSION
597 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
598 #undef TARGET_INVALID_UNARY_OP
599 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
600 #undef TARGET_INVALID_BINARY_OP
601 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
603 #undef TARGET_C_MODE_FOR_SUFFIX
604 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
606 #undef TARGET_CAN_ELIMINATE
607 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
609 #undef TARGET_TRAMPOLINE_INIT
610 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
612 #undef TARGET_INVALID_WITHIN_DOLOOP
613 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
615 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
616 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
618 #undef TARGET_PREFERRED_RELOAD_CLASS
619 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
621 #undef TARGET_DELAY_SCHED2
622 #define TARGET_DELAY_SCHED2 true
624 /* Variable tracking should be run after all optimizations which
625 change order of insns. It also needs a valid CFG. */
626 #undef TARGET_DELAY_VARTRACK
627 #define TARGET_DELAY_VARTRACK true
629 struct gcc_target targetm = TARGET_INITIALIZER;
633 ADDR_AREA_NORMAL, /* normal address area */
634 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
638 static GTY(()) tree small_ident1;
639 static GTY(()) tree small_ident2;
644 if (small_ident1 == 0)
646 small_ident1 = get_identifier ("small");
647 small_ident2 = get_identifier ("__small__");
651 /* Retrieve the address area that has been chosen for the given decl. */
653 static ia64_addr_area
654 ia64_get_addr_area (tree decl)
658 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
664 id = TREE_VALUE (TREE_VALUE (model_attr));
665 if (id == small_ident1 || id == small_ident2)
666 return ADDR_AREA_SMALL;
668 return ADDR_AREA_NORMAL;
672 ia64_handle_model_attribute (tree *node, tree name, tree args,
673 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
675 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
677 tree arg, decl = *node;
680 arg = TREE_VALUE (args);
681 if (arg == small_ident1 || arg == small_ident2)
683 addr_area = ADDR_AREA_SMALL;
687 warning (OPT_Wattributes, "invalid argument of %qE attribute",
689 *no_add_attrs = true;
692 switch (TREE_CODE (decl))
695 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
697 && !TREE_STATIC (decl))
699 error_at (DECL_SOURCE_LOCATION (decl),
700 "an address area attribute cannot be specified for "
702 *no_add_attrs = true;
704 area = ia64_get_addr_area (decl);
705 if (area != ADDR_AREA_NORMAL && addr_area != area)
707 error ("address area of %q+D conflicts with previous "
708 "declaration", decl);
709 *no_add_attrs = true;
714 error_at (DECL_SOURCE_LOCATION (decl),
715 "address area attribute cannot be specified for "
717 *no_add_attrs = true;
721 warning (OPT_Wattributes, "%qE attribute ignored",
723 *no_add_attrs = true;
730 /* The section must have global and overlaid attributes. */
731 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
733 /* Part of the low level implementation of DEC Ada pragma Common_Object which
734 enables the shared use of variables stored in overlaid linker areas
735 corresponding to the use of Fortran COMMON. */
738 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
739 int flags ATTRIBUTE_UNUSED,
747 DECL_COMMON (decl) = 1;
748 id = TREE_VALUE (args);
749 if (TREE_CODE (id) == IDENTIFIER_NODE)
750 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
751 else if (TREE_CODE (id) == STRING_CST)
755 warning (OPT_Wattributes,
756 "%qE attribute requires a string constant argument", name);
757 *no_add_attrs = true;
760 DECL_SECTION_NAME (decl) = val;
764 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
767 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
768 unsigned HOST_WIDE_INT size,
771 tree attr = DECL_ATTRIBUTES (decl);
773 /* As common_object attribute set DECL_SECTION_NAME check it before
774 looking up the attribute. */
775 if (DECL_SECTION_NAME (decl) && attr)
776 attr = lookup_attribute ("common_object", attr);
782 /* Code from elfos.h. */
783 fprintf (file, "%s", COMMON_ASM_OP);
784 assemble_name (file, name);
785 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
786 size, align / BITS_PER_UNIT);
790 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
791 ASM_OUTPUT_LABEL (file, name);
792 ASM_OUTPUT_SKIP (file, size ? size : 1);
796 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
799 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
802 if (!(flags & SECTION_VMS_OVERLAY))
804 default_elf_asm_named_section (name, flags, decl);
807 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
810 if (flags & SECTION_DECLARED)
812 fprintf (asm_out_file, "\t.section\t%s\n", name);
816 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
820 ia64_encode_addr_area (tree decl, rtx symbol)
824 flags = SYMBOL_REF_FLAGS (symbol);
825 switch (ia64_get_addr_area (decl))
827 case ADDR_AREA_NORMAL: break;
828 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
829 default: gcc_unreachable ();
831 SYMBOL_REF_FLAGS (symbol) = flags;
835 ia64_encode_section_info (tree decl, rtx rtl, int first)
837 default_encode_section_info (decl, rtl, first);
839 /* Careful not to prod global register variables. */
840 if (TREE_CODE (decl) == VAR_DECL
841 && GET_CODE (DECL_RTL (decl)) == MEM
842 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
843 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
844 ia64_encode_addr_area (decl, XEXP (rtl, 0));
847 /* Return 1 if the operands of a move are ok. */
850 ia64_move_ok (rtx dst, rtx src)
852 /* If we're under init_recog_no_volatile, we'll not be able to use
853 memory_operand. So check the code directly and don't worry about
854 the validity of the underlying address, which should have been
855 checked elsewhere anyway. */
856 if (GET_CODE (dst) != MEM)
858 if (GET_CODE (src) == MEM)
860 if (register_operand (src, VOIDmode))
863 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
864 if (INTEGRAL_MODE_P (GET_MODE (dst)))
865 return src == const0_rtx;
867 return satisfies_constraint_G (src);
870 /* Return 1 if the operands are ok for a floating point load pair. */
873 ia64_load_pair_ok (rtx dst, rtx src)
875 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
877 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
879 switch (GET_CODE (XEXP (src, 0)))
888 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
890 if (GET_CODE (adjust) != CONST_INT
891 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
902 addp4_optimize_ok (rtx op1, rtx op2)
904 return (basereg_operand (op1, GET_MODE(op1)) !=
905 basereg_operand (op2, GET_MODE(op2)));
908 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
909 Return the length of the field, or <= 0 on failure. */
912 ia64_depz_field_mask (rtx rop, rtx rshift)
914 unsigned HOST_WIDE_INT op = INTVAL (rop);
915 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
917 /* Get rid of the zero bits we're shifting in. */
920 /* We must now have a solid block of 1's at bit 0. */
921 return exact_log2 (op + 1);
924 /* Return the TLS model to use for ADDR. */
926 static enum tls_model
927 tls_symbolic_operand_type (rtx addr)
929 enum tls_model tls_kind = TLS_MODEL_NONE;
931 if (GET_CODE (addr) == CONST)
933 if (GET_CODE (XEXP (addr, 0)) == PLUS
934 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
935 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
937 else if (GET_CODE (addr) == SYMBOL_REF)
938 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
943 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
944 as a base register. */
947 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
950 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
953 && (GENERAL_REGNO_P (REGNO (reg))
954 || !HARD_REGISTER_P (reg)))
961 ia64_legitimate_address_reg (const_rtx reg, bool strict)
963 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
964 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
965 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
972 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
974 if (GET_CODE (disp) == PLUS
975 && rtx_equal_p (reg, XEXP (disp, 0))
976 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
977 || (CONST_INT_P (XEXP (disp, 1))
978 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
984 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
987 ia64_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
990 if (ia64_legitimate_address_reg (x, strict))
992 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
993 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
994 && XEXP (x, 0) != arg_pointer_rtx)
996 else if (GET_CODE (x) == POST_MODIFY
997 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
998 && XEXP (x, 0) != arg_pointer_rtx
999 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1005 /* Return true if X is a constant that is valid for some immediate
1006 field in an instruction. */
1009 ia64_legitimate_constant_p (enum machine_mode mode, rtx x)
1011 switch (GET_CODE (x))
1018 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1020 return satisfies_constraint_G (x);
1024 /* ??? Short term workaround for PR 28490. We must make the code here
1025 match the code in ia64_expand_move and move_operand, even though they
1026 are both technically wrong. */
1027 if (tls_symbolic_operand_type (x) == 0)
1029 HOST_WIDE_INT addend = 0;
1032 if (GET_CODE (op) == CONST
1033 && GET_CODE (XEXP (op, 0)) == PLUS
1034 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1036 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1037 op = XEXP (XEXP (op, 0), 0);
1040 if (any_offset_symbol_operand (op, mode)
1041 || function_operand (op, mode))
1043 if (aligned_offset_symbol_operand (op, mode))
1044 return (addend & 0x3fff) == 0;
1050 if (mode == V2SFmode)
1051 return satisfies_constraint_Y (x);
1053 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1054 && GET_MODE_SIZE (mode) <= 8);
1061 /* Don't allow TLS addresses to get spilled to memory. */
1064 ia64_cannot_force_const_mem (enum machine_mode mode, rtx x)
1068 return tls_symbolic_operand_type (x) != 0;
1071 /* Expand a symbolic constant load. */
1074 ia64_expand_load_address (rtx dest, rtx src)
1076 gcc_assert (GET_CODE (dest) == REG);
1078 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1079 having to pointer-extend the value afterward. Other forms of address
1080 computation below are also more natural to compute as 64-bit quantities.
1081 If we've been given an SImode destination register, change it. */
1082 if (GET_MODE (dest) != Pmode)
1083 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1084 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1088 if (small_addr_symbolic_operand (src, VOIDmode))
1091 if (TARGET_AUTO_PIC)
1092 emit_insn (gen_load_gprel64 (dest, src));
1093 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1094 emit_insn (gen_load_fptr (dest, src));
1095 else if (sdata_symbolic_operand (src, VOIDmode))
1096 emit_insn (gen_load_gprel (dest, src));
1099 HOST_WIDE_INT addend = 0;
1102 /* We did split constant offsets in ia64_expand_move, and we did try
1103 to keep them split in move_operand, but we also allowed reload to
1104 rematerialize arbitrary constants rather than spill the value to
1105 the stack and reload it. So we have to be prepared here to split
1106 them apart again. */
1107 if (GET_CODE (src) == CONST)
1109 HOST_WIDE_INT hi, lo;
1111 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1112 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1118 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1122 tmp = gen_rtx_HIGH (Pmode, src);
1123 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1124 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1126 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1127 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1131 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1132 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1139 static GTY(()) rtx gen_tls_tga;
1141 gen_tls_get_addr (void)
1144 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1148 static GTY(()) rtx thread_pointer_rtx;
1150 gen_thread_pointer (void)
1152 if (!thread_pointer_rtx)
1153 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1154 return thread_pointer_rtx;
1158 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1159 rtx orig_op1, HOST_WIDE_INT addend)
1161 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1163 HOST_WIDE_INT addend_lo, addend_hi;
1167 case TLS_MODEL_GLOBAL_DYNAMIC:
1170 tga_op1 = gen_reg_rtx (Pmode);
1171 emit_insn (gen_load_dtpmod (tga_op1, op1));
1173 tga_op2 = gen_reg_rtx (Pmode);
1174 emit_insn (gen_load_dtprel (tga_op2, op1));
1176 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1177 LCT_CONST, Pmode, 2, tga_op1,
1178 Pmode, tga_op2, Pmode);
1180 insns = get_insns ();
1183 if (GET_MODE (op0) != Pmode)
1185 emit_libcall_block (insns, op0, tga_ret, op1);
1188 case TLS_MODEL_LOCAL_DYNAMIC:
1189 /* ??? This isn't the completely proper way to do local-dynamic
1190 If the call to __tls_get_addr is used only by a single symbol,
1191 then we should (somehow) move the dtprel to the second arg
1192 to avoid the extra add. */
1195 tga_op1 = gen_reg_rtx (Pmode);
1196 emit_insn (gen_load_dtpmod (tga_op1, op1));
1198 tga_op2 = const0_rtx;
1200 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1201 LCT_CONST, Pmode, 2, tga_op1,
1202 Pmode, tga_op2, Pmode);
1204 insns = get_insns ();
1207 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1209 tmp = gen_reg_rtx (Pmode);
1210 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1212 if (!register_operand (op0, Pmode))
1213 op0 = gen_reg_rtx (Pmode);
1216 emit_insn (gen_load_dtprel (op0, op1));
1217 emit_insn (gen_adddi3 (op0, tmp, op0));
1220 emit_insn (gen_add_dtprel (op0, op1, tmp));
1223 case TLS_MODEL_INITIAL_EXEC:
1224 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1225 addend_hi = addend - addend_lo;
1227 op1 = plus_constant (op1, addend_hi);
1230 tmp = gen_reg_rtx (Pmode);
1231 emit_insn (gen_load_tprel (tmp, op1));
1233 if (!register_operand (op0, Pmode))
1234 op0 = gen_reg_rtx (Pmode);
1235 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1238 case TLS_MODEL_LOCAL_EXEC:
1239 if (!register_operand (op0, Pmode))
1240 op0 = gen_reg_rtx (Pmode);
1246 emit_insn (gen_load_tprel (op0, op1));
1247 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1250 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1258 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1259 orig_op0, 1, OPTAB_DIRECT);
1260 if (orig_op0 == op0)
1262 if (GET_MODE (orig_op0) == Pmode)
1264 return gen_lowpart (GET_MODE (orig_op0), op0);
1268 ia64_expand_move (rtx op0, rtx op1)
1270 enum machine_mode mode = GET_MODE (op0);
1272 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1273 op1 = force_reg (mode, op1);
1275 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1277 HOST_WIDE_INT addend = 0;
1278 enum tls_model tls_kind;
1281 if (GET_CODE (op1) == CONST
1282 && GET_CODE (XEXP (op1, 0)) == PLUS
1283 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1285 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1286 sym = XEXP (XEXP (op1, 0), 0);
1289 tls_kind = tls_symbolic_operand_type (sym);
1291 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1293 if (any_offset_symbol_operand (sym, mode))
1295 else if (aligned_offset_symbol_operand (sym, mode))
1297 HOST_WIDE_INT addend_lo, addend_hi;
1299 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1300 addend_hi = addend - addend_lo;
1304 op1 = plus_constant (sym, addend_hi);
1313 if (reload_completed)
1315 /* We really should have taken care of this offset earlier. */
1316 gcc_assert (addend == 0);
1317 if (ia64_expand_load_address (op0, op1))
1323 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1325 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1327 op1 = expand_simple_binop (mode, PLUS, subtarget,
1328 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1337 /* Split a move from OP1 to OP0 conditional on COND. */
1340 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1342 rtx insn, first = get_last_insn ();
1344 emit_move_insn (op0, op1);
1346 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1348 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1352 /* Split a post-reload TImode or TFmode reference into two DImode
1353 components. This is made extra difficult by the fact that we do
1354 not get any scratch registers to work with, because reload cannot
1355 be prevented from giving us a scratch that overlaps the register
1356 pair involved. So instead, when addressing memory, we tweak the
1357 pointer register up and back down with POST_INCs. Or up and not
1358 back down when we can get away with it.
1360 REVERSED is true when the loads must be done in reversed order
1361 (high word first) for correctness. DEAD is true when the pointer
1362 dies with the second insn we generate and therefore the second
1363 address must not carry a postmodify.
1365 May return an insn which is to be emitted after the moves. */
1368 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1372 switch (GET_CODE (in))
1375 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1376 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1381 /* Cannot occur reversed. */
1382 gcc_assert (!reversed);
1384 if (GET_MODE (in) != TFmode)
1385 split_double (in, &out[0], &out[1]);
1387 /* split_double does not understand how to split a TFmode
1388 quantity into a pair of DImode constants. */
1391 unsigned HOST_WIDE_INT p[2];
1392 long l[4]; /* TFmode is 128 bits */
1394 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1395 real_to_target (l, &r, TFmode);
1397 if (FLOAT_WORDS_BIG_ENDIAN)
1399 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1400 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1404 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1405 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1407 out[0] = GEN_INT (p[0]);
1408 out[1] = GEN_INT (p[1]);
1414 rtx base = XEXP (in, 0);
1417 switch (GET_CODE (base))
1422 out[0] = adjust_automodify_address
1423 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1424 out[1] = adjust_automodify_address
1425 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1429 /* Reversal requires a pre-increment, which can only
1430 be done as a separate insn. */
1431 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1432 out[0] = adjust_automodify_address
1433 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1434 out[1] = adjust_address (in, DImode, 0);
1439 gcc_assert (!reversed && !dead);
1441 /* Just do the increment in two steps. */
1442 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1443 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1447 gcc_assert (!reversed && !dead);
1449 /* Add 8, subtract 24. */
1450 base = XEXP (base, 0);
1451 out[0] = adjust_automodify_address
1452 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1453 out[1] = adjust_automodify_address
1455 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1460 gcc_assert (!reversed && !dead);
1462 /* Extract and adjust the modification. This case is
1463 trickier than the others, because we might have an
1464 index register, or we might have a combined offset that
1465 doesn't fit a signed 9-bit displacement field. We can
1466 assume the incoming expression is already legitimate. */
1467 offset = XEXP (base, 1);
1468 base = XEXP (base, 0);
1470 out[0] = adjust_automodify_address
1471 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1473 if (GET_CODE (XEXP (offset, 1)) == REG)
1475 /* Can't adjust the postmodify to match. Emit the
1476 original, then a separate addition insn. */
1477 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1478 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1482 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1483 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1485 /* Again the postmodify cannot be made to match,
1486 but in this case it's more efficient to get rid
1487 of the postmodify entirely and fix up with an
1489 out[1] = adjust_automodify_address (in, DImode, base, 8);
1491 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1495 /* Combined offset still fits in the displacement field.
1496 (We cannot overflow it at the high end.) */
1497 out[1] = adjust_automodify_address
1498 (in, DImode, gen_rtx_POST_MODIFY
1499 (Pmode, base, gen_rtx_PLUS
1501 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1520 /* Split a TImode or TFmode move instruction after reload.
1521 This is used by *movtf_internal and *movti_internal. */
1523 ia64_split_tmode_move (rtx operands[])
1525 rtx in[2], out[2], insn;
1528 bool reversed = false;
1530 /* It is possible for reload to decide to overwrite a pointer with
1531 the value it points to. In that case we have to do the loads in
1532 the appropriate order so that the pointer is not destroyed too
1533 early. Also we must not generate a postmodify for that second
1534 load, or rws_access_regno will die. */
1535 if (GET_CODE (operands[1]) == MEM
1536 && reg_overlap_mentioned_p (operands[0], operands[1]))
1538 rtx base = XEXP (operands[1], 0);
1539 while (GET_CODE (base) != REG)
1540 base = XEXP (base, 0);
1542 if (REGNO (base) == REGNO (operands[0]))
1546 /* Another reason to do the moves in reversed order is if the first
1547 element of the target register pair is also the second element of
1548 the source register pair. */
1549 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1550 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1553 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1554 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1556 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1557 if (GET_CODE (EXP) == MEM \
1558 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1559 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1560 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1561 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1563 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1564 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1565 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1567 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1568 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1569 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1572 emit_insn (fixup[0]);
1574 emit_insn (fixup[1]);
1576 #undef MAYBE_ADD_REG_INC_NOTE
1579 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1580 through memory plus an extra GR scratch register. Except that you can
1581 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1582 SECONDARY_RELOAD_CLASS, but not both.
1584 We got into problems in the first place by allowing a construct like
1585 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1586 This solution attempts to prevent this situation from occurring. When
1587 we see something like the above, we spill the inner register to memory. */
1590 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1592 if (GET_CODE (in) == SUBREG
1593 && GET_MODE (SUBREG_REG (in)) == TImode
1594 && GET_CODE (SUBREG_REG (in)) == REG)
1596 rtx memt = assign_stack_temp (TImode, 16, 0);
1597 emit_move_insn (memt, SUBREG_REG (in));
1598 return adjust_address (memt, mode, 0);
1600 else if (force && GET_CODE (in) == REG)
1602 rtx memx = assign_stack_temp (mode, 16, 0);
1603 emit_move_insn (memx, in);
1610 /* Expand the movxf or movrf pattern (MODE says which) with the given
1611 OPERANDS, returning true if the pattern should then invoke
1615 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1617 rtx op0 = operands[0];
1619 if (GET_CODE (op0) == SUBREG)
1620 op0 = SUBREG_REG (op0);
1622 /* We must support XFmode loads into general registers for stdarg/vararg,
1623 unprototyped calls, and a rare case where a long double is passed as
1624 an argument after a float HFA fills the FP registers. We split them into
1625 DImode loads for convenience. We also need to support XFmode stores
1626 for the last case. This case does not happen for stdarg/vararg routines,
1627 because we do a block store to memory of unnamed arguments. */
1629 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1633 /* We're hoping to transform everything that deals with XFmode
1634 quantities and GR registers early in the compiler. */
1635 gcc_assert (can_create_pseudo_p ());
1637 /* Struct to register can just use TImode instead. */
1638 if ((GET_CODE (operands[1]) == SUBREG
1639 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1640 || (GET_CODE (operands[1]) == REG
1641 && GR_REGNO_P (REGNO (operands[1]))))
1643 rtx op1 = operands[1];
1645 if (GET_CODE (op1) == SUBREG)
1646 op1 = SUBREG_REG (op1);
1648 op1 = gen_rtx_REG (TImode, REGNO (op1));
1650 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1654 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1656 /* Don't word-swap when reading in the constant. */
1657 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1658 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1660 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1661 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1666 /* If the quantity is in a register not known to be GR, spill it. */
1667 if (register_operand (operands[1], mode))
1668 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1670 gcc_assert (GET_CODE (operands[1]) == MEM);
1672 /* Don't word-swap when reading in the value. */
1673 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1674 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1676 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1677 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1681 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1683 /* We're hoping to transform everything that deals with XFmode
1684 quantities and GR registers early in the compiler. */
1685 gcc_assert (can_create_pseudo_p ());
1687 /* Op0 can't be a GR_REG here, as that case is handled above.
1688 If op0 is a register, then we spill op1, so that we now have a
1689 MEM operand. This requires creating an XFmode subreg of a TImode reg
1690 to force the spill. */
1691 if (register_operand (operands[0], mode))
1693 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1694 op1 = gen_rtx_SUBREG (mode, op1, 0);
1695 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1702 gcc_assert (GET_CODE (operands[0]) == MEM);
1704 /* Don't word-swap when writing out the value. */
1705 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1706 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1708 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1709 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1714 if (!reload_in_progress && !reload_completed)
1716 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1718 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1720 rtx memt, memx, in = operands[1];
1721 if (CONSTANT_P (in))
1722 in = validize_mem (force_const_mem (mode, in));
1723 if (GET_CODE (in) == MEM)
1724 memt = adjust_address (in, TImode, 0);
1727 memt = assign_stack_temp (TImode, 16, 0);
1728 memx = adjust_address (memt, mode, 0);
1729 emit_move_insn (memx, in);
1731 emit_move_insn (op0, memt);
1735 if (!ia64_move_ok (operands[0], operands[1]))
1736 operands[1] = force_reg (mode, operands[1]);
1742 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1743 with the expression that holds the compare result (in VOIDmode). */
1745 static GTY(()) rtx cmptf_libfunc;
1748 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1750 enum rtx_code code = GET_CODE (*expr);
1753 /* If we have a BImode input, then we already have a compare result, and
1754 do not need to emit another comparison. */
1755 if (GET_MODE (*op0) == BImode)
1757 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1760 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1761 magic number as its third argument, that indicates what to do.
1762 The return value is an integer to be compared against zero. */
1763 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1766 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1773 enum rtx_code ncode;
1776 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1779 /* 1 = equal, 0 = not equal. Equality operators do
1780 not raise FP_INVALID when given an SNaN operand. */
1781 case EQ: magic = QCMP_EQ; ncode = NE; break;
1782 case NE: magic = QCMP_EQ; ncode = EQ; break;
1783 /* isunordered() from C99. */
1784 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1785 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1786 /* Relational operators raise FP_INVALID when given
1788 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1789 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1790 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1791 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1792 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1793 Expanders for buneq etc. weuld have to be added to ia64.md
1794 for this to be useful. */
1795 default: gcc_unreachable ();
1800 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1801 *op0, TFmode, *op1, TFmode,
1802 GEN_INT (magic), DImode);
1803 cmp = gen_reg_rtx (BImode);
1804 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1805 gen_rtx_fmt_ee (ncode, BImode,
1808 insns = get_insns ();
1811 emit_libcall_block (insns, cmp, cmp,
1812 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1817 cmp = gen_reg_rtx (BImode);
1818 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1819 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1823 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1828 /* Generate an integral vector comparison. Return true if the condition has
1829 been reversed, and so the sense of the comparison should be inverted. */
1832 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1833 rtx dest, rtx op0, rtx op1)
1835 bool negate = false;
1838 /* Canonicalize the comparison to EQ, GT, GTU. */
1849 code = reverse_condition (code);
1855 code = reverse_condition (code);
1861 code = swap_condition (code);
1862 x = op0, op0 = op1, op1 = x;
1869 /* Unsigned parallel compare is not supported by the hardware. Play some
1870 tricks to turn this into a signed comparison against 0. */
1879 /* Subtract (-(INT MAX) - 1) from both operands to make
1881 mask = GEN_INT (0x80000000);
1882 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1883 mask = force_reg (mode, mask);
1884 t1 = gen_reg_rtx (mode);
1885 emit_insn (gen_subv2si3 (t1, op0, mask));
1886 t2 = gen_reg_rtx (mode);
1887 emit_insn (gen_subv2si3 (t2, op1, mask));
1896 /* Perform a parallel unsigned saturating subtraction. */
1897 x = gen_reg_rtx (mode);
1898 emit_insn (gen_rtx_SET (VOIDmode, x,
1899 gen_rtx_US_MINUS (mode, op0, op1)));
1903 op1 = CONST0_RTX (mode);
1912 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1913 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1918 /* Emit an integral vector conditional move. */
1921 ia64_expand_vecint_cmov (rtx operands[])
1923 enum machine_mode mode = GET_MODE (operands[0]);
1924 enum rtx_code code = GET_CODE (operands[3]);
1928 cmp = gen_reg_rtx (mode);
1929 negate = ia64_expand_vecint_compare (code, mode, cmp,
1930 operands[4], operands[5]);
1932 ot = operands[1+negate];
1933 of = operands[2-negate];
1935 if (ot == CONST0_RTX (mode))
1937 if (of == CONST0_RTX (mode))
1939 emit_move_insn (operands[0], ot);
1943 x = gen_rtx_NOT (mode, cmp);
1944 x = gen_rtx_AND (mode, x, of);
1945 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1947 else if (of == CONST0_RTX (mode))
1949 x = gen_rtx_AND (mode, cmp, ot);
1950 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1956 t = gen_reg_rtx (mode);
1957 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1958 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1960 f = gen_reg_rtx (mode);
1961 x = gen_rtx_NOT (mode, cmp);
1962 x = gen_rtx_AND (mode, x, operands[2-negate]);
1963 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1965 x = gen_rtx_IOR (mode, t, f);
1966 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1970 /* Emit an integral vector min or max operation. Return true if all done. */
1973 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1978 /* These four combinations are supported directly. */
1979 if (mode == V8QImode && (code == UMIN || code == UMAX))
1981 if (mode == V4HImode && (code == SMIN || code == SMAX))
1984 /* This combination can be implemented with only saturating subtraction. */
1985 if (mode == V4HImode && code == UMAX)
1987 rtx x, tmp = gen_reg_rtx (mode);
1989 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1990 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1992 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1996 /* Everything else implemented via vector comparisons. */
1997 xops[0] = operands[0];
1998 xops[4] = xops[1] = operands[1];
1999 xops[5] = xops[2] = operands[2];
2018 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2020 ia64_expand_vecint_cmov (xops);
2024 /* The vectors LO and HI each contain N halves of a double-wide vector.
2025 Reassemble either the first N/2 or the second N/2 elements. */
2028 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2030 enum machine_mode mode = GET_MODE (lo);
2031 rtx (*gen) (rtx, rtx, rtx);
2037 gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi;
2040 gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
2046 x = gen_lowpart (mode, out);
2047 if (TARGET_BIG_ENDIAN)
2048 x = gen (x, hi, lo);
2050 x = gen (x, lo, hi);
2054 /* Return a vector of the sign-extension of VEC. */
2057 ia64_unpack_sign (rtx vec, bool unsignedp)
2059 enum machine_mode mode = GET_MODE (vec);
2060 rtx zero = CONST0_RTX (mode);
2066 rtx sign = gen_reg_rtx (mode);
2069 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2076 /* Emit an integral vector unpack operation. */
2079 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2081 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2082 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2085 /* Emit an integral vector widening sum operations. */
2088 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2090 enum machine_mode wmode;
2093 sign = ia64_unpack_sign (operands[1], unsignedp);
2095 wmode = GET_MODE (operands[0]);
2096 l = gen_reg_rtx (wmode);
2097 h = gen_reg_rtx (wmode);
2099 ia64_unpack_assemble (l, operands[1], sign, false);
2100 ia64_unpack_assemble (h, operands[1], sign, true);
2102 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2103 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2104 if (t != operands[0])
2105 emit_move_insn (operands[0], t);
2108 /* Emit a signed or unsigned V8QI dot product operation. */
2111 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
2113 rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
2114 rtx p1, p2, p3, p4, s1, s2, s3;
2118 sn1 = ia64_unpack_sign (op1, unsignedp);
2119 sn2 = ia64_unpack_sign (op2, unsignedp);
2121 l1 = gen_reg_rtx (V4HImode);
2122 l2 = gen_reg_rtx (V4HImode);
2123 h1 = gen_reg_rtx (V4HImode);
2124 h2 = gen_reg_rtx (V4HImode);
2125 ia64_unpack_assemble (l1, op1, sn1, false);
2126 ia64_unpack_assemble (l2, op2, sn2, false);
2127 ia64_unpack_assemble (h1, op1, sn1, true);
2128 ia64_unpack_assemble (h2, op2, sn2, true);
2130 p1 = gen_reg_rtx (V2SImode);
2131 p2 = gen_reg_rtx (V2SImode);
2132 p3 = gen_reg_rtx (V2SImode);
2133 p4 = gen_reg_rtx (V2SImode);
2134 emit_insn (gen_pmpy2_even (p1, l1, l2));
2135 emit_insn (gen_pmpy2_even (p2, h1, h2));
2136 emit_insn (gen_pmpy2_odd (p3, l1, l2));
2137 emit_insn (gen_pmpy2_odd (p4, h1, h2));
2139 s1 = gen_reg_rtx (V2SImode);
2140 s2 = gen_reg_rtx (V2SImode);
2141 s3 = gen_reg_rtx (V2SImode);
2142 emit_insn (gen_addv2si3 (s1, p1, p2));
2143 emit_insn (gen_addv2si3 (s2, p3, p4));
2144 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2145 emit_insn (gen_addv2si3 (operands[0], s2, s3));
2148 /* Emit the appropriate sequence for a call. */
2151 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2156 addr = XEXP (addr, 0);
2157 addr = convert_memory_address (DImode, addr);
2158 b0 = gen_rtx_REG (DImode, R_BR (0));
2160 /* ??? Should do this for functions known to bind local too. */
2161 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2164 insn = gen_sibcall_nogp (addr);
2166 insn = gen_call_nogp (addr, b0);
2168 insn = gen_call_value_nogp (retval, addr, b0);
2169 insn = emit_call_insn (insn);
2174 insn = gen_sibcall_gp (addr);
2176 insn = gen_call_gp (addr, b0);
2178 insn = gen_call_value_gp (retval, addr, b0);
2179 insn = emit_call_insn (insn);
2181 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2185 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2187 if (TARGET_ABI_OPEN_VMS)
2188 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2189 gen_rtx_REG (DImode, GR_REG (25)));
2193 reg_emitted (enum ia64_frame_regs r)
2195 if (emitted_frame_related_regs[r] == 0)
2196 emitted_frame_related_regs[r] = current_frame_info.r[r];
2198 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2202 get_reg (enum ia64_frame_regs r)
2205 return current_frame_info.r[r];
2209 is_emitted (int regno)
2213 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2214 if (emitted_frame_related_regs[r] == regno)
2220 ia64_reload_gp (void)
2224 if (current_frame_info.r[reg_save_gp])
2226 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2230 HOST_WIDE_INT offset;
2233 offset = (current_frame_info.spill_cfa_off
2234 + current_frame_info.spill_size);
2235 if (frame_pointer_needed)
2237 tmp = hard_frame_pointer_rtx;
2242 tmp = stack_pointer_rtx;
2243 offset = current_frame_info.total_size - offset;
2246 offset_r = GEN_INT (offset);
2247 if (satisfies_constraint_I (offset_r))
2248 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2251 emit_move_insn (pic_offset_table_rtx, offset_r);
2252 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2253 pic_offset_table_rtx, tmp));
2256 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2259 emit_move_insn (pic_offset_table_rtx, tmp);
2263 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2264 rtx scratch_b, int noreturn_p, int sibcall_p)
2267 bool is_desc = false;
2269 /* If we find we're calling through a register, then we're actually
2270 calling through a descriptor, so load up the values. */
2271 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2276 /* ??? We are currently constrained to *not* use peep2, because
2277 we can legitimately change the global lifetime of the GP
2278 (in the form of killing where previously live). This is
2279 because a call through a descriptor doesn't use the previous
2280 value of the GP, while a direct call does, and we do not
2281 commit to either form until the split here.
2283 That said, this means that we lack precise life info for
2284 whether ADDR is dead after this call. This is not terribly
2285 important, since we can fix things up essentially for free
2286 with the POST_DEC below, but it's nice to not use it when we
2287 can immediately tell it's not necessary. */
2288 addr_dead_p = ((noreturn_p || sibcall_p
2289 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2291 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2293 /* Load the code address into scratch_b. */
2294 tmp = gen_rtx_POST_INC (Pmode, addr);
2295 tmp = gen_rtx_MEM (Pmode, tmp);
2296 emit_move_insn (scratch_r, tmp);
2297 emit_move_insn (scratch_b, scratch_r);
2299 /* Load the GP address. If ADDR is not dead here, then we must
2300 revert the change made above via the POST_INCREMENT. */
2302 tmp = gen_rtx_POST_DEC (Pmode, addr);
2305 tmp = gen_rtx_MEM (Pmode, tmp);
2306 emit_move_insn (pic_offset_table_rtx, tmp);
2313 insn = gen_sibcall_nogp (addr);
2315 insn = gen_call_value_nogp (retval, addr, retaddr);
2317 insn = gen_call_nogp (addr, retaddr);
2318 emit_call_insn (insn);
2320 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2324 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2326 This differs from the generic code in that we know about the zero-extending
2327 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2328 also know that ld.acq+cmpxchg.rel equals a full barrier.
2330 The loop we want to generate looks like
2335 new_reg = cmp_reg op val;
2336 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2337 if (cmp_reg != old_reg)
2340 Note that we only do the plain load from memory once. Subsequent
2341 iterations use the value loaded by the compare-and-swap pattern. */
2344 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2345 rtx old_dst, rtx new_dst, enum memmodel model)
2347 enum machine_mode mode = GET_MODE (mem);
2348 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2349 enum insn_code icode;
2351 /* Special case for using fetchadd. */
2352 if ((mode == SImode || mode == DImode)
2353 && (code == PLUS || code == MINUS)
2354 && fetchadd_operand (val, mode))
2357 val = GEN_INT (-INTVAL (val));
2360 old_dst = gen_reg_rtx (mode);
2364 case MEMMODEL_ACQ_REL:
2365 case MEMMODEL_SEQ_CST:
2366 emit_insn (gen_memory_barrier ());
2368 case MEMMODEL_RELAXED:
2369 case MEMMODEL_ACQUIRE:
2370 case MEMMODEL_CONSUME:
2372 icode = CODE_FOR_fetchadd_acq_si;
2374 icode = CODE_FOR_fetchadd_acq_di;
2376 case MEMMODEL_RELEASE:
2378 icode = CODE_FOR_fetchadd_rel_si;
2380 icode = CODE_FOR_fetchadd_rel_di;
2387 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2391 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2393 if (new_reg != new_dst)
2394 emit_move_insn (new_dst, new_reg);
2399 /* Because of the volatile mem read, we get an ld.acq, which is the
2400 front half of the full barrier. The end half is the cmpxchg.rel.
2401 For relaxed and release memory models, we don't need this. But we
2402 also don't bother trying to prevent it either. */
2403 gcc_assert (model == MEMMODEL_RELAXED
2404 || model == MEMMODEL_RELEASE
2405 || MEM_VOLATILE_P (mem));
2407 old_reg = gen_reg_rtx (DImode);
2408 cmp_reg = gen_reg_rtx (DImode);
2409 label = gen_label_rtx ();
2413 val = simplify_gen_subreg (DImode, val, mode, 0);
2414 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2417 emit_move_insn (cmp_reg, mem);
2421 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2422 emit_move_insn (old_reg, cmp_reg);
2423 emit_move_insn (ar_ccv, cmp_reg);
2426 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2431 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2432 true, OPTAB_DIRECT);
2433 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2436 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2437 true, OPTAB_DIRECT);
2440 new_reg = gen_lowpart (mode, new_reg);
2442 emit_move_insn (new_dst, new_reg);
2446 case MEMMODEL_RELAXED:
2447 case MEMMODEL_ACQUIRE:
2448 case MEMMODEL_CONSUME:
2451 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2452 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2453 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2454 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2460 case MEMMODEL_RELEASE:
2461 case MEMMODEL_ACQ_REL:
2462 case MEMMODEL_SEQ_CST:
2465 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2466 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2467 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2468 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2478 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2480 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2483 /* Begin the assembly file. */
2486 ia64_file_start (void)
2488 default_file_start ();
2489 emit_safe_across_calls ();
2493 emit_safe_across_calls (void)
2495 unsigned int rs, re;
2502 while (rs < 64 && call_used_regs[PR_REG (rs)])
2506 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2510 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2514 fputc (',', asm_out_file);
2516 fprintf (asm_out_file, "p%u", rs);
2518 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2522 fputc ('\n', asm_out_file);
2525 /* Globalize a declaration. */
2528 ia64_globalize_decl_name (FILE * stream, tree decl)
2530 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2531 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2534 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2535 const char *p = TREE_STRING_POINTER (v);
2536 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2538 targetm.asm_out.globalize_label (stream, name);
2539 if (TREE_CODE (decl) == FUNCTION_DECL)
2540 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2543 /* Helper function for ia64_compute_frame_size: find an appropriate general
2544 register to spill some special register to. SPECIAL_SPILL_MASK contains
2545 bits in GR0 to GR31 that have already been allocated by this routine.
2546 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2549 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2553 if (emitted_frame_related_regs[r] != 0)
2555 regno = emitted_frame_related_regs[r];
2556 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2557 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2558 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2559 else if (current_function_is_leaf
2560 && regno >= GR_REG (1) && regno <= GR_REG (31))
2561 current_frame_info.gr_used_mask |= 1 << regno;
2566 /* If this is a leaf function, first try an otherwise unused
2567 call-clobbered register. */
2568 if (current_function_is_leaf)
2570 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2571 if (! df_regs_ever_live_p (regno)
2572 && call_used_regs[regno]
2573 && ! fixed_regs[regno]
2574 && ! global_regs[regno]
2575 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2576 && ! is_emitted (regno))
2578 current_frame_info.gr_used_mask |= 1 << regno;
2585 regno = current_frame_info.n_local_regs;
2586 /* If there is a frame pointer, then we can't use loc79, because
2587 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2588 reg_name switching code in ia64_expand_prologue. */
2589 while (regno < (80 - frame_pointer_needed))
2590 if (! is_emitted (LOC_REG (regno++)))
2592 current_frame_info.n_local_regs = regno;
2593 return LOC_REG (regno - 1);
2597 /* Failed to find a general register to spill to. Must use stack. */
2601 /* In order to make for nice schedules, we try to allocate every temporary
2602 to a different register. We must of course stay away from call-saved,
2603 fixed, and global registers. We must also stay away from registers
2604 allocated in current_frame_info.gr_used_mask, since those include regs
2605 used all through the prologue.
2607 Any register allocated here must be used immediately. The idea is to
2608 aid scheduling, not to solve data flow problems. */
2610 static int last_scratch_gr_reg;
2613 next_scratch_gr_reg (void)
2617 for (i = 0; i < 32; ++i)
2619 regno = (last_scratch_gr_reg + i + 1) & 31;
2620 if (call_used_regs[regno]
2621 && ! fixed_regs[regno]
2622 && ! global_regs[regno]
2623 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2625 last_scratch_gr_reg = regno;
2630 /* There must be _something_ available. */
2634 /* Helper function for ia64_compute_frame_size, called through
2635 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2638 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2640 unsigned int regno = REGNO (reg);
2643 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2644 for (i = 0; i < n; ++i)
2645 current_frame_info.gr_used_mask |= 1 << (regno + i);
2650 /* Returns the number of bytes offset between the frame pointer and the stack
2651 pointer for the current function. SIZE is the number of bytes of space
2652 needed for local variables. */
2655 ia64_compute_frame_size (HOST_WIDE_INT size)
2657 HOST_WIDE_INT total_size;
2658 HOST_WIDE_INT spill_size = 0;
2659 HOST_WIDE_INT extra_spill_size = 0;
2660 HOST_WIDE_INT pretend_args_size;
2663 int spilled_gr_p = 0;
2664 int spilled_fr_p = 0;
2670 if (current_frame_info.initialized)
2673 memset (¤t_frame_info, 0, sizeof current_frame_info);
2674 CLEAR_HARD_REG_SET (mask);
2676 /* Don't allocate scratches to the return register. */
2677 diddle_return_value (mark_reg_gr_used_mask, NULL);
2679 /* Don't allocate scratches to the EH scratch registers. */
2680 if (cfun->machine->ia64_eh_epilogue_sp)
2681 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2682 if (cfun->machine->ia64_eh_epilogue_bsp)
2683 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2685 /* Find the size of the register stack frame. We have only 80 local
2686 registers, because we reserve 8 for the inputs and 8 for the
2689 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2690 since we'll be adjusting that down later. */
2691 regno = LOC_REG (78) + ! frame_pointer_needed;
2692 for (; regno >= LOC_REG (0); regno--)
2693 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2695 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2697 /* For functions marked with the syscall_linkage attribute, we must mark
2698 all eight input registers as in use, so that locals aren't visible to
2701 if (cfun->machine->n_varargs > 0
2702 || lookup_attribute ("syscall_linkage",
2703 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2704 current_frame_info.n_input_regs = 8;
2707 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2708 if (df_regs_ever_live_p (regno))
2710 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2713 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2714 if (df_regs_ever_live_p (regno))
2716 i = regno - OUT_REG (0) + 1;
2718 #ifndef PROFILE_HOOK
2719 /* When -p profiling, we need one output register for the mcount argument.
2720 Likewise for -a profiling for the bb_init_func argument. For -ax
2721 profiling, we need two output registers for the two bb_init_trace_func
2726 current_frame_info.n_output_regs = i;
2728 /* ??? No rotating register support yet. */
2729 current_frame_info.n_rotate_regs = 0;
2731 /* Discover which registers need spilling, and how much room that
2732 will take. Begin with floating point and general registers,
2733 which will always wind up on the stack. */
2735 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2736 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2738 SET_HARD_REG_BIT (mask, regno);
2744 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2745 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2747 SET_HARD_REG_BIT (mask, regno);
2753 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2754 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2756 SET_HARD_REG_BIT (mask, regno);
2761 /* Now come all special registers that might get saved in other
2762 general registers. */
2764 if (frame_pointer_needed)
2766 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2767 /* If we did not get a register, then we take LOC79. This is guaranteed
2768 to be free, even if regs_ever_live is already set, because this is
2769 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2770 as we don't count loc79 above. */
2771 if (current_frame_info.r[reg_fp] == 0)
2773 current_frame_info.r[reg_fp] = LOC_REG (79);
2774 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2778 if (! current_function_is_leaf)
2780 /* Emit a save of BR0 if we call other functions. Do this even
2781 if this function doesn't return, as EH depends on this to be
2782 able to unwind the stack. */
2783 SET_HARD_REG_BIT (mask, BR_REG (0));
2785 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2786 if (current_frame_info.r[reg_save_b0] == 0)
2788 extra_spill_size += 8;
2792 /* Similarly for ar.pfs. */
2793 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2794 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2795 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2797 extra_spill_size += 8;
2801 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2802 registers are clobbered, so we fall back to the stack. */
2803 current_frame_info.r[reg_save_gp]
2804 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2805 if (current_frame_info.r[reg_save_gp] == 0)
2807 SET_HARD_REG_BIT (mask, GR_REG (1));
2814 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2816 SET_HARD_REG_BIT (mask, BR_REG (0));
2817 extra_spill_size += 8;
2821 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2823 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2824 current_frame_info.r[reg_save_ar_pfs]
2825 = find_gr_spill (reg_save_ar_pfs, 1);
2826 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2828 extra_spill_size += 8;
2834 /* Unwind descriptor hackery: things are most efficient if we allocate
2835 consecutive GR save registers for RP, PFS, FP in that order. However,
2836 it is absolutely critical that FP get the only hard register that's
2837 guaranteed to be free, so we allocated it first. If all three did
2838 happen to be allocated hard regs, and are consecutive, rearrange them
2839 into the preferred order now.
2841 If we have already emitted code for any of those registers,
2842 then it's already too late to change. */
2843 min_regno = MIN (current_frame_info.r[reg_fp],
2844 MIN (current_frame_info.r[reg_save_b0],
2845 current_frame_info.r[reg_save_ar_pfs]));
2846 max_regno = MAX (current_frame_info.r[reg_fp],
2847 MAX (current_frame_info.r[reg_save_b0],
2848 current_frame_info.r[reg_save_ar_pfs]));
2850 && min_regno + 2 == max_regno
2851 && (current_frame_info.r[reg_fp] == min_regno + 1
2852 || current_frame_info.r[reg_save_b0] == min_regno + 1
2853 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2854 && (emitted_frame_related_regs[reg_save_b0] == 0
2855 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2856 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2857 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2858 && (emitted_frame_related_regs[reg_fp] == 0
2859 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2861 current_frame_info.r[reg_save_b0] = min_regno;
2862 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2863 current_frame_info.r[reg_fp] = min_regno + 2;
2866 /* See if we need to store the predicate register block. */
2867 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2868 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2870 if (regno <= PR_REG (63))
2872 SET_HARD_REG_BIT (mask, PR_REG (0));
2873 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2874 if (current_frame_info.r[reg_save_pr] == 0)
2876 extra_spill_size += 8;
2880 /* ??? Mark them all as used so that register renaming and such
2881 are free to use them. */
2882 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2883 df_set_regs_ever_live (regno, true);
2886 /* If we're forced to use st8.spill, we're forced to save and restore
2887 ar.unat as well. The check for existing liveness allows inline asm
2888 to touch ar.unat. */
2889 if (spilled_gr_p || cfun->machine->n_varargs
2890 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2892 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2893 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2894 current_frame_info.r[reg_save_ar_unat]
2895 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2896 if (current_frame_info.r[reg_save_ar_unat] == 0)
2898 extra_spill_size += 8;
2903 if (df_regs_ever_live_p (AR_LC_REGNUM))
2905 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2906 current_frame_info.r[reg_save_ar_lc]
2907 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2908 if (current_frame_info.r[reg_save_ar_lc] == 0)
2910 extra_spill_size += 8;
2915 /* If we have an odd number of words of pretend arguments written to
2916 the stack, then the FR save area will be unaligned. We round the
2917 size of this area up to keep things 16 byte aligned. */
2919 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2921 pretend_args_size = crtl->args.pretend_args_size;
2923 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2924 + crtl->outgoing_args_size);
2925 total_size = IA64_STACK_ALIGN (total_size);
2927 /* We always use the 16-byte scratch area provided by the caller, but
2928 if we are a leaf function, there's no one to which we need to provide
2930 if (current_function_is_leaf)
2931 total_size = MAX (0, total_size - 16);
2933 current_frame_info.total_size = total_size;
2934 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2935 current_frame_info.spill_size = spill_size;
2936 current_frame_info.extra_spill_size = extra_spill_size;
2937 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2938 current_frame_info.n_spilled = n_spilled;
2939 current_frame_info.initialized = reload_completed;
2942 /* Worker function for TARGET_CAN_ELIMINATE. */
2945 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2947 return (to == BR_REG (0) ? current_function_is_leaf : true);
2950 /* Compute the initial difference between the specified pair of registers. */
2953 ia64_initial_elimination_offset (int from, int to)
2955 HOST_WIDE_INT offset;
2957 ia64_compute_frame_size (get_frame_size ());
2960 case FRAME_POINTER_REGNUM:
2963 case HARD_FRAME_POINTER_REGNUM:
2964 if (current_function_is_leaf)
2965 offset = -current_frame_info.total_size;
2967 offset = -(current_frame_info.total_size
2968 - crtl->outgoing_args_size - 16);
2971 case STACK_POINTER_REGNUM:
2972 if (current_function_is_leaf)
2975 offset = 16 + crtl->outgoing_args_size;
2983 case ARG_POINTER_REGNUM:
2984 /* Arguments start above the 16 byte save area, unless stdarg
2985 in which case we store through the 16 byte save area. */
2988 case HARD_FRAME_POINTER_REGNUM:
2989 offset = 16 - crtl->args.pretend_args_size;
2992 case STACK_POINTER_REGNUM:
2993 offset = (current_frame_info.total_size
2994 + 16 - crtl->args.pretend_args_size);
3009 /* If there are more than a trivial number of register spills, we use
3010 two interleaved iterators so that we can get two memory references
3013 In order to simplify things in the prologue and epilogue expanders,
3014 we use helper functions to fix up the memory references after the
3015 fact with the appropriate offsets to a POST_MODIFY memory mode.
3016 The following data structure tracks the state of the two iterators
3017 while insns are being emitted. */
3019 struct spill_fill_data
3021 rtx init_after; /* point at which to emit initializations */
3022 rtx init_reg[2]; /* initial base register */
3023 rtx iter_reg[2]; /* the iterator registers */
3024 rtx *prev_addr[2]; /* address of last memory use */
3025 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
3026 HOST_WIDE_INT prev_off[2]; /* last offset */
3027 int n_iter; /* number of iterators in use */
3028 int next_iter; /* next iterator to use */
3029 unsigned int save_gr_used_mask;
3032 static struct spill_fill_data spill_fill_data;
3035 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3039 spill_fill_data.init_after = get_last_insn ();
3040 spill_fill_data.init_reg[0] = init_reg;
3041 spill_fill_data.init_reg[1] = init_reg;
3042 spill_fill_data.prev_addr[0] = NULL;
3043 spill_fill_data.prev_addr[1] = NULL;
3044 spill_fill_data.prev_insn[0] = NULL;
3045 spill_fill_data.prev_insn[1] = NULL;
3046 spill_fill_data.prev_off[0] = cfa_off;
3047 spill_fill_data.prev_off[1] = cfa_off;
3048 spill_fill_data.next_iter = 0;
3049 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3051 spill_fill_data.n_iter = 1 + (n_spills > 2);
3052 for (i = 0; i < spill_fill_data.n_iter; ++i)
3054 int regno = next_scratch_gr_reg ();
3055 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3056 current_frame_info.gr_used_mask |= 1 << regno;
3061 finish_spill_pointers (void)
3063 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3067 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3069 int iter = spill_fill_data.next_iter;
3070 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3071 rtx disp_rtx = GEN_INT (disp);
3074 if (spill_fill_data.prev_addr[iter])
3076 if (satisfies_constraint_N (disp_rtx))
3078 *spill_fill_data.prev_addr[iter]
3079 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3080 gen_rtx_PLUS (DImode,
3081 spill_fill_data.iter_reg[iter],
3083 add_reg_note (spill_fill_data.prev_insn[iter],
3084 REG_INC, spill_fill_data.iter_reg[iter]);
3088 /* ??? Could use register post_modify for loads. */
3089 if (!satisfies_constraint_I (disp_rtx))
3091 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3092 emit_move_insn (tmp, disp_rtx);
3095 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3096 spill_fill_data.iter_reg[iter], disp_rtx));
3099 /* Micro-optimization: if we've created a frame pointer, it's at
3100 CFA 0, which may allow the real iterator to be initialized lower,
3101 slightly increasing parallelism. Also, if there are few saves
3102 it may eliminate the iterator entirely. */
3104 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3105 && frame_pointer_needed)
3107 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3108 set_mem_alias_set (mem, get_varargs_alias_set ());
3116 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3117 spill_fill_data.init_reg[iter]);
3122 if (!satisfies_constraint_I (disp_rtx))
3124 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3125 emit_move_insn (tmp, disp_rtx);
3129 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3130 spill_fill_data.init_reg[iter],
3137 /* Careful for being the first insn in a sequence. */
3138 if (spill_fill_data.init_after)
3139 insn = emit_insn_after (seq, spill_fill_data.init_after);
3142 rtx first = get_insns ();
3144 insn = emit_insn_before (seq, first);
3146 insn = emit_insn (seq);
3148 spill_fill_data.init_after = insn;
3151 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3153 /* ??? Not all of the spills are for varargs, but some of them are.
3154 The rest of the spills belong in an alias set of their own. But
3155 it doesn't actually hurt to include them here. */
3156 set_mem_alias_set (mem, get_varargs_alias_set ());
3158 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3159 spill_fill_data.prev_off[iter] = cfa_off;
3161 if (++iter >= spill_fill_data.n_iter)
3163 spill_fill_data.next_iter = iter;
3169 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3172 int iter = spill_fill_data.next_iter;
3175 mem = spill_restore_mem (reg, cfa_off);
3176 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3177 spill_fill_data.prev_insn[iter] = insn;
3184 RTX_FRAME_RELATED_P (insn) = 1;
3186 /* Don't even pretend that the unwind code can intuit its way
3187 through a pair of interleaved post_modify iterators. Just
3188 provide the correct answer. */
3190 if (frame_pointer_needed)
3192 base = hard_frame_pointer_rtx;
3197 base = stack_pointer_rtx;
3198 off = current_frame_info.total_size - cfa_off;
3201 add_reg_note (insn, REG_CFA_OFFSET,
3202 gen_rtx_SET (VOIDmode,
3203 gen_rtx_MEM (GET_MODE (reg),
3204 plus_constant (base, off)),
3210 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3212 int iter = spill_fill_data.next_iter;
3215 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3216 GEN_INT (cfa_off)));
3217 spill_fill_data.prev_insn[iter] = insn;
3220 /* Wrapper functions that discards the CONST_INT spill offset. These
3221 exist so that we can give gr_spill/gr_fill the offset they need and
3222 use a consistent function interface. */
3225 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3227 return gen_movdi (dest, src);
3231 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3233 return gen_fr_spill (dest, src);
3237 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3239 return gen_fr_restore (dest, src);
3242 /* Called after register allocation to add any instructions needed for the
3243 prologue. Using a prologue insn is favored compared to putting all of the
3244 instructions in output_function_prologue(), since it allows the scheduler
3245 to intermix instructions with the saves of the caller saved registers. In
3246 some cases, it might be necessary to emit a barrier instruction as the last
3247 insn to prevent such scheduling.
3249 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3250 so that the debug info generation code can handle them properly.
3252 The register save area is layed out like so:
3254 [ varargs spill area ]
3255 [ fr register spill area ]
3256 [ br register spill area ]
3257 [ ar register spill area ]
3258 [ pr register spill area ]
3259 [ gr register spill area ] */
3261 /* ??? Get inefficient code when the frame size is larger than can fit in an
3262 adds instruction. */
3265 ia64_expand_prologue (void)
3267 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3268 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3271 ia64_compute_frame_size (get_frame_size ());
3272 last_scratch_gr_reg = 15;
3274 if (flag_stack_usage_info)
3275 current_function_static_stack_size = current_frame_info.total_size;
3279 fprintf (dump_file, "ia64 frame related registers "
3280 "recorded in current_frame_info.r[]:\n");
3281 #define PRINTREG(a) if (current_frame_info.r[a]) \
3282 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3284 PRINTREG(reg_save_b0);
3285 PRINTREG(reg_save_pr);
3286 PRINTREG(reg_save_ar_pfs);
3287 PRINTREG(reg_save_ar_unat);
3288 PRINTREG(reg_save_ar_lc);
3289 PRINTREG(reg_save_gp);
3293 /* If there is no epilogue, then we don't need some prologue insns.
3294 We need to avoid emitting the dead prologue insns, because flow
3295 will complain about them. */
3301 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3302 if ((e->flags & EDGE_FAKE) == 0
3303 && (e->flags & EDGE_FALLTHRU) != 0)
3305 epilogue_p = (e != NULL);
3310 /* Set the local, input, and output register names. We need to do this
3311 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3312 half. If we use in/loc/out register names, then we get assembler errors
3313 in crtn.S because there is no alloc insn or regstk directive in there. */
3314 if (! TARGET_REG_NAMES)
3316 int inputs = current_frame_info.n_input_regs;
3317 int locals = current_frame_info.n_local_regs;
3318 int outputs = current_frame_info.n_output_regs;
3320 for (i = 0; i < inputs; i++)
3321 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3322 for (i = 0; i < locals; i++)
3323 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3324 for (i = 0; i < outputs; i++)
3325 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3328 /* Set the frame pointer register name. The regnum is logically loc79,
3329 but of course we'll not have allocated that many locals. Rather than
3330 worrying about renumbering the existing rtxs, we adjust the name. */
3331 /* ??? This code means that we can never use one local register when
3332 there is a frame pointer. loc79 gets wasted in this case, as it is
3333 renamed to a register that will never be used. See also the try_locals
3334 code in find_gr_spill. */
3335 if (current_frame_info.r[reg_fp])
3337 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3338 reg_names[HARD_FRAME_POINTER_REGNUM]
3339 = reg_names[current_frame_info.r[reg_fp]];
3340 reg_names[current_frame_info.r[reg_fp]] = tmp;
3343 /* We don't need an alloc instruction if we've used no outputs or locals. */
3344 if (current_frame_info.n_local_regs == 0
3345 && current_frame_info.n_output_regs == 0
3346 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3347 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3349 /* If there is no alloc, but there are input registers used, then we
3350 need a .regstk directive. */
3351 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3352 ar_pfs_save_reg = NULL_RTX;
3356 current_frame_info.need_regstk = 0;
3358 if (current_frame_info.r[reg_save_ar_pfs])
3360 regno = current_frame_info.r[reg_save_ar_pfs];
3361 reg_emitted (reg_save_ar_pfs);
3364 regno = next_scratch_gr_reg ();
3365 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3367 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3368 GEN_INT (current_frame_info.n_input_regs),
3369 GEN_INT (current_frame_info.n_local_regs),
3370 GEN_INT (current_frame_info.n_output_regs),
3371 GEN_INT (current_frame_info.n_rotate_regs)));
3372 if (current_frame_info.r[reg_save_ar_pfs])
3374 RTX_FRAME_RELATED_P (insn) = 1;
3375 add_reg_note (insn, REG_CFA_REGISTER,
3376 gen_rtx_SET (VOIDmode,
3378 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3382 /* Set up frame pointer, stack pointer, and spill iterators. */
3384 n_varargs = cfun->machine->n_varargs;
3385 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3386 stack_pointer_rtx, 0);
3388 if (frame_pointer_needed)
3390 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3391 RTX_FRAME_RELATED_P (insn) = 1;
3393 /* Force the unwind info to recognize this as defining a new CFA,
3394 rather than some temp register setup. */
3395 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3398 if (current_frame_info.total_size != 0)
3400 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3403 if (satisfies_constraint_I (frame_size_rtx))
3404 offset = frame_size_rtx;
3407 regno = next_scratch_gr_reg ();
3408 offset = gen_rtx_REG (DImode, regno);
3409 emit_move_insn (offset, frame_size_rtx);
3412 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3413 stack_pointer_rtx, offset));
3415 if (! frame_pointer_needed)
3417 RTX_FRAME_RELATED_P (insn) = 1;
3418 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3419 gen_rtx_SET (VOIDmode,
3421 gen_rtx_PLUS (DImode,
3426 /* ??? At this point we must generate a magic insn that appears to
3427 modify the stack pointer, the frame pointer, and all spill
3428 iterators. This would allow the most scheduling freedom. For
3429 now, just hard stop. */
3430 emit_insn (gen_blockage ());
3433 /* Must copy out ar.unat before doing any integer spills. */
3434 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3436 if (current_frame_info.r[reg_save_ar_unat])
3439 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3440 reg_emitted (reg_save_ar_unat);
3444 alt_regno = next_scratch_gr_reg ();
3445 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3446 current_frame_info.gr_used_mask |= 1 << alt_regno;
3449 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3450 insn = emit_move_insn (ar_unat_save_reg, reg);
3451 if (current_frame_info.r[reg_save_ar_unat])
3453 RTX_FRAME_RELATED_P (insn) = 1;
3454 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3457 /* Even if we're not going to generate an epilogue, we still
3458 need to save the register so that EH works. */
3459 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3460 emit_insn (gen_prologue_use (ar_unat_save_reg));
3463 ar_unat_save_reg = NULL_RTX;
3465 /* Spill all varargs registers. Do this before spilling any GR registers,
3466 since we want the UNAT bits for the GR registers to override the UNAT
3467 bits from varargs, which we don't care about. */
3470 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3472 reg = gen_rtx_REG (DImode, regno);
3473 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3476 /* Locate the bottom of the register save area. */
3477 cfa_off = (current_frame_info.spill_cfa_off
3478 + current_frame_info.spill_size
3479 + current_frame_info.extra_spill_size);
3481 /* Save the predicate register block either in a register or in memory. */
3482 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3484 reg = gen_rtx_REG (DImode, PR_REG (0));
3485 if (current_frame_info.r[reg_save_pr] != 0)
3487 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3488 reg_emitted (reg_save_pr);
3489 insn = emit_move_insn (alt_reg, reg);
3491 /* ??? Denote pr spill/fill by a DImode move that modifies all
3492 64 hard registers. */
3493 RTX_FRAME_RELATED_P (insn) = 1;
3494 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3496 /* Even if we're not going to generate an epilogue, we still
3497 need to save the register so that EH works. */
3499 emit_insn (gen_prologue_use (alt_reg));
3503 alt_regno = next_scratch_gr_reg ();
3504 alt_reg = gen_rtx_REG (DImode, alt_regno);
3505 insn = emit_move_insn (alt_reg, reg);
3506 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3511 /* Handle AR regs in numerical order. All of them get special handling. */
3512 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3513 && current_frame_info.r[reg_save_ar_unat] == 0)
3515 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3516 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3520 /* The alloc insn already copied ar.pfs into a general register. The
3521 only thing we have to do now is copy that register to a stack slot
3522 if we'd not allocated a local register for the job. */
3523 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3524 && current_frame_info.r[reg_save_ar_pfs] == 0)
3526 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3527 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3531 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3533 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3534 if (current_frame_info.r[reg_save_ar_lc] != 0)
3536 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3537 reg_emitted (reg_save_ar_lc);
3538 insn = emit_move_insn (alt_reg, reg);
3539 RTX_FRAME_RELATED_P (insn) = 1;
3540 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3542 /* Even if we're not going to generate an epilogue, we still
3543 need to save the register so that EH works. */
3545 emit_insn (gen_prologue_use (alt_reg));
3549 alt_regno = next_scratch_gr_reg ();
3550 alt_reg = gen_rtx_REG (DImode, alt_regno);
3551 emit_move_insn (alt_reg, reg);
3552 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3557 /* Save the return pointer. */
3558 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3560 reg = gen_rtx_REG (DImode, BR_REG (0));
3561 if (current_frame_info.r[reg_save_b0] != 0)
3563 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3564 reg_emitted (reg_save_b0);
3565 insn = emit_move_insn (alt_reg, reg);
3566 RTX_FRAME_RELATED_P (insn) = 1;
3567 add_reg_note (insn, REG_CFA_REGISTER,
3568 gen_rtx_SET (VOIDmode, alt_reg, pc_rtx));
3570 /* Even if we're not going to generate an epilogue, we still
3571 need to save the register so that EH works. */
3573 emit_insn (gen_prologue_use (alt_reg));
3577 alt_regno = next_scratch_gr_reg ();
3578 alt_reg = gen_rtx_REG (DImode, alt_regno);
3579 emit_move_insn (alt_reg, reg);
3580 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3585 if (current_frame_info.r[reg_save_gp])
3587 reg_emitted (reg_save_gp);
3588 insn = emit_move_insn (gen_rtx_REG (DImode,
3589 current_frame_info.r[reg_save_gp]),
3590 pic_offset_table_rtx);
3593 /* We should now be at the base of the gr/br/fr spill area. */
3594 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3595 + current_frame_info.spill_size));
3597 /* Spill all general registers. */
3598 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3599 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3601 reg = gen_rtx_REG (DImode, regno);
3602 do_spill (gen_gr_spill, reg, cfa_off, reg);
3606 /* Spill the rest of the BR registers. */
3607 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3608 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3610 alt_regno = next_scratch_gr_reg ();
3611 alt_reg = gen_rtx_REG (DImode, alt_regno);
3612 reg = gen_rtx_REG (DImode, regno);
3613 emit_move_insn (alt_reg, reg);
3614 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3618 /* Align the frame and spill all FR registers. */
3619 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3620 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3622 gcc_assert (!(cfa_off & 15));
3623 reg = gen_rtx_REG (XFmode, regno);
3624 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3628 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3630 finish_spill_pointers ();
3633 /* Output the textual info surrounding the prologue. */
3636 ia64_start_function (FILE *file, const char *fnname,
3637 tree decl ATTRIBUTE_UNUSED)
3639 #if VMS_DEBUGGING_INFO
3641 && debug_info_level > DINFO_LEVEL_NONE
3642 && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
3644 targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
3645 ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
3646 dwarf2out_vms_debug_main_pointer ();
3651 fputs ("\t.proc ", file);
3652 assemble_name (file, fnname);
3654 ASM_OUTPUT_LABEL (file, fnname);
3657 /* Called after register allocation to add any instructions needed for the
3658 epilogue. Using an epilogue insn is favored compared to putting all of the
3659 instructions in output_function_prologue(), since it allows the scheduler
3660 to intermix instructions with the saves of the caller saved registers. In
3661 some cases, it might be necessary to emit a barrier instruction as the last
3662 insn to prevent such scheduling. */
3665 ia64_expand_epilogue (int sibcall_p)
3667 rtx insn, reg, alt_reg, ar_unat_save_reg;
3668 int regno, alt_regno, cfa_off;
3670 ia64_compute_frame_size (get_frame_size ());
3672 /* If there is a frame pointer, then we use it instead of the stack
3673 pointer, so that the stack pointer does not need to be valid when
3674 the epilogue starts. See EXIT_IGNORE_STACK. */
3675 if (frame_pointer_needed)
3676 setup_spill_pointers (current_frame_info.n_spilled,
3677 hard_frame_pointer_rtx, 0);
3679 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3680 current_frame_info.total_size);
3682 if (current_frame_info.total_size != 0)
3684 /* ??? At this point we must generate a magic insn that appears to
3685 modify the spill iterators and the frame pointer. This would
3686 allow the most scheduling freedom. For now, just hard stop. */
3687 emit_insn (gen_blockage ());
3690 /* Locate the bottom of the register save area. */
3691 cfa_off = (current_frame_info.spill_cfa_off
3692 + current_frame_info.spill_size
3693 + current_frame_info.extra_spill_size);
3695 /* Restore the predicate registers. */
3696 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3698 if (current_frame_info.r[reg_save_pr] != 0)
3700 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3701 reg_emitted (reg_save_pr);
3705 alt_regno = next_scratch_gr_reg ();
3706 alt_reg = gen_rtx_REG (DImode, alt_regno);
3707 do_restore (gen_movdi_x, alt_reg, cfa_off);
3710 reg = gen_rtx_REG (DImode, PR_REG (0));
3711 emit_move_insn (reg, alt_reg);
3714 /* Restore the application registers. */
3716 /* Load the saved unat from the stack, but do not restore it until
3717 after the GRs have been restored. */
3718 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3720 if (current_frame_info.r[reg_save_ar_unat] != 0)
3723 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3724 reg_emitted (reg_save_ar_unat);
3728 alt_regno = next_scratch_gr_reg ();
3729 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3730 current_frame_info.gr_used_mask |= 1 << alt_regno;
3731 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3736 ar_unat_save_reg = NULL_RTX;
3738 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3740 reg_emitted (reg_save_ar_pfs);
3741 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3742 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3743 emit_move_insn (reg, alt_reg);
3745 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3747 alt_regno = next_scratch_gr_reg ();
3748 alt_reg = gen_rtx_REG (DImode, alt_regno);
3749 do_restore (gen_movdi_x, alt_reg, cfa_off);
3751 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3752 emit_move_insn (reg, alt_reg);
3755 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3757 if (current_frame_info.r[reg_save_ar_lc] != 0)
3759 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3760 reg_emitted (reg_save_ar_lc);
3764 alt_regno = next_scratch_gr_reg ();
3765 alt_reg = gen_rtx_REG (DImode, alt_regno);
3766 do_restore (gen_movdi_x, alt_reg, cfa_off);
3769 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3770 emit_move_insn (reg, alt_reg);
3773 /* Restore the return pointer. */
3774 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3776 if (current_frame_info.r[reg_save_b0] != 0)
3778 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3779 reg_emitted (reg_save_b0);
3783 alt_regno = next_scratch_gr_reg ();
3784 alt_reg = gen_rtx_REG (DImode, alt_regno);
3785 do_restore (gen_movdi_x, alt_reg, cfa_off);
3788 reg = gen_rtx_REG (DImode, BR_REG (0));
3789 emit_move_insn (reg, alt_reg);
3792 /* We should now be at the base of the gr/br/fr spill area. */
3793 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3794 + current_frame_info.spill_size));
3796 /* The GP may be stored on the stack in the prologue, but it's
3797 never restored in the epilogue. Skip the stack slot. */
3798 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3801 /* Restore all general registers. */
3802 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3803 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3805 reg = gen_rtx_REG (DImode, regno);
3806 do_restore (gen_gr_restore, reg, cfa_off);
3810 /* Restore the branch registers. */
3811 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3812 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3814 alt_regno = next_scratch_gr_reg ();
3815 alt_reg = gen_rtx_REG (DImode, alt_regno);
3816 do_restore (gen_movdi_x, alt_reg, cfa_off);
3818 reg = gen_rtx_REG (DImode, regno);
3819 emit_move_insn (reg, alt_reg);
3822 /* Restore floating point registers. */
3823 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3824 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3826 gcc_assert (!(cfa_off & 15));
3827 reg = gen_rtx_REG (XFmode, regno);
3828 do_restore (gen_fr_restore_x, reg, cfa_off);
3832 /* Restore ar.unat for real. */
3833 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3835 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3836 emit_move_insn (reg, ar_unat_save_reg);
3839 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3841 finish_spill_pointers ();
3843 if (current_frame_info.total_size
3844 || cfun->machine->ia64_eh_epilogue_sp
3845 || frame_pointer_needed)
3847 /* ??? At this point we must generate a magic insn that appears to
3848 modify the spill iterators, the stack pointer, and the frame
3849 pointer. This would allow the most scheduling freedom. For now,
3851 emit_insn (gen_blockage ());
3854 if (cfun->machine->ia64_eh_epilogue_sp)
3855 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3856 else if (frame_pointer_needed)
3858 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3859 RTX_FRAME_RELATED_P (insn) = 1;
3860 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
3862 else if (current_frame_info.total_size)
3864 rtx offset, frame_size_rtx;
3866 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3867 if (satisfies_constraint_I (frame_size_rtx))
3868 offset = frame_size_rtx;
3871 regno = next_scratch_gr_reg ();
3872 offset = gen_rtx_REG (DImode, regno);
3873 emit_move_insn (offset, frame_size_rtx);
3876 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3879 RTX_FRAME_RELATED_P (insn) = 1;
3880 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3881 gen_rtx_SET (VOIDmode,
3883 gen_rtx_PLUS (DImode,
3888 if (cfun->machine->ia64_eh_epilogue_bsp)
3889 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3892 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3895 int fp = GR_REG (2);
3896 /* We need a throw away register here, r0 and r1 are reserved,
3897 so r2 is the first available call clobbered register. If
3898 there was a frame_pointer register, we may have swapped the
3899 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
3900 sure we're using the string "r2" when emitting the register
3901 name for the assembler. */
3902 if (current_frame_info.r[reg_fp]
3903 && current_frame_info.r[reg_fp] == GR_REG (2))
3904 fp = HARD_FRAME_POINTER_REGNUM;
3906 /* We must emit an alloc to force the input registers to become output
3907 registers. Otherwise, if the callee tries to pass its parameters
3908 through to another call without an intervening alloc, then these
3910 /* ??? We don't need to preserve all input registers. We only need to
3911 preserve those input registers used as arguments to the sibling call.
3912 It is unclear how to compute that number here. */
3913 if (current_frame_info.n_input_regs != 0)
3915 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3917 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3918 const0_rtx, const0_rtx,
3919 n_inputs, const0_rtx));
3920 RTX_FRAME_RELATED_P (insn) = 1;
3922 /* ??? We need to mark the alloc as frame-related so that it gets
3923 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
3924 But there's nothing dwarf2 related to be done wrt the register
3925 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
3926 the empty parallel means dwarf2out will not see anything. */
3927 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3928 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
3933 /* Return 1 if br.ret can do all the work required to return from a
3937 ia64_direct_return (void)
3939 if (reload_completed && ! frame_pointer_needed)
3941 ia64_compute_frame_size (get_frame_size ());
3943 return (current_frame_info.total_size == 0
3944 && current_frame_info.n_spilled == 0
3945 && current_frame_info.r[reg_save_b0] == 0
3946 && current_frame_info.r[reg_save_pr] == 0
3947 && current_frame_info.r[reg_save_ar_pfs] == 0
3948 && current_frame_info.r[reg_save_ar_unat] == 0
3949 && current_frame_info.r[reg_save_ar_lc] == 0);
3954 /* Return the magic cookie that we use to hold the return address
3955 during early compilation. */
3958 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3962 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3965 /* Split this value after reload, now that we know where the return
3966 address is saved. */
3969 ia64_split_return_addr_rtx (rtx dest)
3973 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3975 if (current_frame_info.r[reg_save_b0] != 0)
3977 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3978 reg_emitted (reg_save_b0);
3986 /* Compute offset from CFA for BR0. */
3987 /* ??? Must be kept in sync with ia64_expand_prologue. */
3988 off = (current_frame_info.spill_cfa_off
3989 + current_frame_info.spill_size);
3990 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3991 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3994 /* Convert CFA offset to a register based offset. */
3995 if (frame_pointer_needed)
3996 src = hard_frame_pointer_rtx;
3999 src = stack_pointer_rtx;
4000 off += current_frame_info.total_size;
4003 /* Load address into scratch register. */
4004 off_r = GEN_INT (off);
4005 if (satisfies_constraint_I (off_r))
4006 emit_insn (gen_adddi3 (dest, src, off_r));
4009 emit_move_insn (dest, off_r);
4010 emit_insn (gen_adddi3 (dest, src, dest));
4013 src = gen_rtx_MEM (Pmode, dest);
4017 src = gen_rtx_REG (DImode, BR_REG (0));
4019 emit_move_insn (dest, src);
4023 ia64_hard_regno_rename_ok (int from, int to)
4025 /* Don't clobber any of the registers we reserved for the prologue. */
4028 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4029 if (to == current_frame_info.r[r]
4030 || from == current_frame_info.r[r]
4031 || to == emitted_frame_related_regs[r]
4032 || from == emitted_frame_related_regs[r])
4035 /* Don't use output registers outside the register frame. */
4036 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4039 /* Retain even/oddness on predicate register pairs. */
4040 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4041 return (from & 1) == (to & 1);
4046 /* Target hook for assembling integer objects. Handle word-sized
4047 aligned objects and detect the cases when @fptr is needed. */
4050 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4052 if (size == POINTER_SIZE / BITS_PER_UNIT
4053 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4054 && GET_CODE (x) == SYMBOL_REF
4055 && SYMBOL_REF_FUNCTION_P (x))
4057 static const char * const directive[2][2] = {
4058 /* 64-bit pointer */ /* 32-bit pointer */
4059 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4060 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4062 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4063 output_addr_const (asm_out_file, x);
4064 fputs (")\n", asm_out_file);
4067 return default_assemble_integer (x, size, aligned_p);
4070 /* Emit the function prologue. */
4073 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4075 int mask, grsave, grsave_prev;
4077 if (current_frame_info.need_regstk)
4078 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4079 current_frame_info.n_input_regs,
4080 current_frame_info.n_local_regs,
4081 current_frame_info.n_output_regs,
4082 current_frame_info.n_rotate_regs);
4084 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4087 /* Emit the .prologue directive. */
4090 grsave = grsave_prev = 0;
4091 if (current_frame_info.r[reg_save_b0] != 0)
4094 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4096 if (current_frame_info.r[reg_save_ar_pfs] != 0
4097 && (grsave_prev == 0
4098 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4101 if (grsave_prev == 0)
4102 grsave = current_frame_info.r[reg_save_ar_pfs];
4103 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4105 if (current_frame_info.r[reg_fp] != 0
4106 && (grsave_prev == 0
4107 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4110 if (grsave_prev == 0)
4111 grsave = HARD_FRAME_POINTER_REGNUM;
4112 grsave_prev = current_frame_info.r[reg_fp];
4114 if (current_frame_info.r[reg_save_pr] != 0
4115 && (grsave_prev == 0
4116 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4119 if (grsave_prev == 0)
4120 grsave = current_frame_info.r[reg_save_pr];
4123 if (mask && TARGET_GNU_AS)
4124 fprintf (file, "\t.prologue %d, %d\n", mask,
4125 ia64_dbx_register_number (grsave));
4127 fputs ("\t.prologue\n", file);
4129 /* Emit a .spill directive, if necessary, to relocate the base of
4130 the register spill area. */
4131 if (current_frame_info.spill_cfa_off != -16)
4132 fprintf (file, "\t.spill %ld\n",
4133 (long) (current_frame_info.spill_cfa_off
4134 + current_frame_info.spill_size));
4137 /* Emit the .body directive at the scheduled end of the prologue. */
4140 ia64_output_function_end_prologue (FILE *file)
4142 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4145 fputs ("\t.body\n", file);
4148 /* Emit the function epilogue. */
4151 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4152 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4156 if (current_frame_info.r[reg_fp])
4158 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4159 reg_names[HARD_FRAME_POINTER_REGNUM]
4160 = reg_names[current_frame_info.r[reg_fp]];
4161 reg_names[current_frame_info.r[reg_fp]] = tmp;
4162 reg_emitted (reg_fp);
4164 if (! TARGET_REG_NAMES)
4166 for (i = 0; i < current_frame_info.n_input_regs; i++)
4167 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4168 for (i = 0; i < current_frame_info.n_local_regs; i++)
4169 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4170 for (i = 0; i < current_frame_info.n_output_regs; i++)
4171 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4174 current_frame_info.initialized = 0;
4178 ia64_dbx_register_number (int regno)
4180 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4181 from its home at loc79 to something inside the register frame. We
4182 must perform the same renumbering here for the debug info. */
4183 if (current_frame_info.r[reg_fp])
4185 if (regno == HARD_FRAME_POINTER_REGNUM)
4186 regno = current_frame_info.r[reg_fp];
4187 else if (regno == current_frame_info.r[reg_fp])
4188 regno = HARD_FRAME_POINTER_REGNUM;
4191 if (IN_REGNO_P (regno))
4192 return 32 + regno - IN_REG (0);
4193 else if (LOC_REGNO_P (regno))
4194 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4195 else if (OUT_REGNO_P (regno))
4196 return (32 + current_frame_info.n_input_regs
4197 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4202 /* Implement TARGET_TRAMPOLINE_INIT.
4204 The trampoline should set the static chain pointer to value placed
4205 into the trampoline and should branch to the specified routine.
4206 To make the normal indirect-subroutine calling convention work,
4207 the trampoline must look like a function descriptor; the first
4208 word being the target address and the second being the target's
4211 We abuse the concept of a global pointer by arranging for it
4212 to point to the data we need to load. The complete trampoline
4213 has the following form:
4215 +-------------------+ \
4216 TRAMP: | __ia64_trampoline | |
4217 +-------------------+ > fake function descriptor
4219 +-------------------+ /
4220 | target descriptor |
4221 +-------------------+
4223 +-------------------+
4227 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4229 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4230 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4232 /* The Intel assembler requires that the global __ia64_trampoline symbol
4233 be declared explicitly */
4236 static bool declared_ia64_trampoline = false;
4238 if (!declared_ia64_trampoline)
4240 declared_ia64_trampoline = true;
4241 (*targetm.asm_out.globalize_label) (asm_out_file,
4242 "__ia64_trampoline");
4246 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4247 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4248 fnaddr = convert_memory_address (Pmode, fnaddr);
4249 static_chain = convert_memory_address (Pmode, static_chain);
4251 /* Load up our iterator. */
4252 addr_reg = copy_to_reg (addr);
4253 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4255 /* The first two words are the fake descriptor:
4256 __ia64_trampoline, ADDR+16. */
4257 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4258 if (TARGET_ABI_OPEN_VMS)
4260 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4261 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4262 relocation against function symbols to make it identical to the
4263 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4264 strict ELF and dereference to get the bare code address. */
4265 rtx reg = gen_reg_rtx (Pmode);
4266 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4267 emit_move_insn (reg, tramp);
4268 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4271 emit_move_insn (m_tramp, tramp);
4272 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4273 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4275 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4276 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4277 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4279 /* The third word is the target descriptor. */
4280 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4281 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4282 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4284 /* The fourth word is the static chain. */
4285 emit_move_insn (m_tramp, static_chain);
4288 /* Do any needed setup for a variadic function. CUM has not been updated
4289 for the last named argument which has type TYPE and mode MODE.
4291 We generate the actual spill instructions during prologue generation. */
4294 ia64_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4295 tree type, int * pretend_size,
4296 int second_time ATTRIBUTE_UNUSED)
4298 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4300 /* Skip the current argument. */
4301 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4303 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4305 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4306 *pretend_size = n * UNITS_PER_WORD;
4307 cfun->machine->n_varargs = n;
4311 /* Check whether TYPE is a homogeneous floating point aggregate. If
4312 it is, return the mode of the floating point type that appears
4313 in all leafs. If it is not, return VOIDmode.
4315 An aggregate is a homogeneous floating point aggregate is if all
4316 fields/elements in it have the same floating point type (e.g,
4317 SFmode). 128-bit quad-precision floats are excluded.
4319 Variable sized aggregates should never arrive here, since we should
4320 have already decided to pass them by reference. Top-level zero-sized
4321 aggregates are excluded because our parallels crash the middle-end. */
4323 static enum machine_mode
4324 hfa_element_mode (const_tree type, bool nested)
4326 enum machine_mode element_mode = VOIDmode;
4327 enum machine_mode mode;
4328 enum tree_code code = TREE_CODE (type);
4329 int know_element_mode = 0;
4332 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4337 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4338 case BOOLEAN_TYPE: case POINTER_TYPE:
4339 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4340 case LANG_TYPE: case FUNCTION_TYPE:
4343 /* Fortran complex types are supposed to be HFAs, so we need to handle
4344 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4347 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4348 && TYPE_MODE (type) != TCmode)
4349 return GET_MODE_INNER (TYPE_MODE (type));
4354 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4355 mode if this is contained within an aggregate. */
4356 if (nested && TYPE_MODE (type) != TFmode)
4357 return TYPE_MODE (type);
4362 return hfa_element_mode (TREE_TYPE (type), 1);
4366 case QUAL_UNION_TYPE:
4367 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4369 if (TREE_CODE (t) != FIELD_DECL)
4372 mode = hfa_element_mode (TREE_TYPE (t), 1);
4373 if (know_element_mode)
4375 if (mode != element_mode)
4378 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4382 know_element_mode = 1;
4383 element_mode = mode;
4386 return element_mode;
4389 /* If we reach here, we probably have some front-end specific type
4390 that the backend doesn't know about. This can happen via the
4391 aggregate_value_p call in init_function_start. All we can do is
4392 ignore unknown tree types. */
4399 /* Return the number of words required to hold a quantity of TYPE and MODE
4400 when passed as an argument. */
4402 ia64_function_arg_words (const_tree type, enum machine_mode mode)
4406 if (mode == BLKmode)
4407 words = int_size_in_bytes (type);
4409 words = GET_MODE_SIZE (mode);
4411 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4414 /* Return the number of registers that should be skipped so the current
4415 argument (described by TYPE and WORDS) will be properly aligned.
4417 Integer and float arguments larger than 8 bytes start at the next
4418 even boundary. Aggregates larger than 8 bytes start at the next
4419 even boundary if the aggregate has 16 byte alignment. Note that
4420 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4421 but are still to be aligned in registers.
4423 ??? The ABI does not specify how to handle aggregates with
4424 alignment from 9 to 15 bytes, or greater than 16. We handle them
4425 all as if they had 16 byte alignment. Such aggregates can occur
4426 only if gcc extensions are used. */
4428 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4429 const_tree type, int words)
4431 /* No registers are skipped on VMS. */
4432 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4436 && TREE_CODE (type) != INTEGER_TYPE
4437 && TREE_CODE (type) != REAL_TYPE)
4438 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4443 /* Return rtx for register where argument is passed, or zero if it is passed
4445 /* ??? 128-bit quad-precision floats are always passed in general
4449 ia64_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
4450 const_tree type, bool named, bool incoming)
4452 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4454 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4455 int words = ia64_function_arg_words (type, mode);
4456 int offset = ia64_function_arg_offset (cum, type, words);
4457 enum machine_mode hfa_mode = VOIDmode;
4459 /* For OPEN VMS, emit the instruction setting up the argument register here,
4460 when we know this will be together with the other arguments setup related
4461 insns. This is not the conceptually best place to do this, but this is
4462 the easiest as we have convenient access to cumulative args info. */
4464 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4467 unsigned HOST_WIDE_INT regval = cum->words;
4470 for (i = 0; i < 8; i++)
4471 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4473 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4477 /* If all argument slots are used, then it must go on the stack. */
4478 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4481 /* Check for and handle homogeneous FP aggregates. */
4483 hfa_mode = hfa_element_mode (type, 0);
4485 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4486 and unprototyped hfas are passed specially. */
4487 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4491 int fp_regs = cum->fp_regs;
4492 int int_regs = cum->words + offset;
4493 int hfa_size = GET_MODE_SIZE (hfa_mode);
4497 /* If prototyped, pass it in FR regs then GR regs.
4498 If not prototyped, pass it in both FR and GR regs.
4500 If this is an SFmode aggregate, then it is possible to run out of
4501 FR regs while GR regs are still left. In that case, we pass the
4502 remaining part in the GR regs. */
4504 /* Fill the FP regs. We do this always. We stop if we reach the end
4505 of the argument, the last FP register, or the last argument slot. */
4507 byte_size = ((mode == BLKmode)
4508 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4509 args_byte_size = int_regs * UNITS_PER_WORD;
4511 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4512 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4514 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4515 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4519 args_byte_size += hfa_size;
4523 /* If no prototype, then the whole thing must go in GR regs. */
4524 if (! cum->prototype)
4526 /* If this is an SFmode aggregate, then we might have some left over
4527 that needs to go in GR regs. */
4528 else if (byte_size != offset)
4529 int_regs += offset / UNITS_PER_WORD;
4531 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4533 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4535 enum machine_mode gr_mode = DImode;
4536 unsigned int gr_size;
4538 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4539 then this goes in a GR reg left adjusted/little endian, right
4540 adjusted/big endian. */
4541 /* ??? Currently this is handled wrong, because 4-byte hunks are
4542 always right adjusted/little endian. */
4545 /* If we have an even 4 byte hunk because the aggregate is a
4546 multiple of 4 bytes in size, then this goes in a GR reg right
4547 adjusted/little endian. */
4548 else if (byte_size - offset == 4)
4551 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4552 gen_rtx_REG (gr_mode, (basereg
4556 gr_size = GET_MODE_SIZE (gr_mode);
4558 if (gr_size == UNITS_PER_WORD
4559 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4561 else if (gr_size > UNITS_PER_WORD)
4562 int_regs += gr_size / UNITS_PER_WORD;
4564 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4567 /* On OpenVMS variable argument is either in Rn or Fn. */
4568 else if (TARGET_ABI_OPEN_VMS && named == 0)
4570 if (FLOAT_MODE_P (mode))
4571 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4573 return gen_rtx_REG (mode, basereg + cum->words);
4576 /* Integral and aggregates go in general registers. If we have run out of
4577 FR registers, then FP values must also go in general registers. This can
4578 happen when we have a SFmode HFA. */
4579 else if (mode == TFmode || mode == TCmode
4580 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4582 int byte_size = ((mode == BLKmode)
4583 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4584 if (BYTES_BIG_ENDIAN
4585 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4586 && byte_size < UNITS_PER_WORD
4589 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4590 gen_rtx_REG (DImode,
4591 (basereg + cum->words
4594 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4597 return gen_rtx_REG (mode, basereg + cum->words + offset);
4601 /* If there is a prototype, then FP values go in a FR register when
4602 named, and in a GR register when unnamed. */
4603 else if (cum->prototype)
4606 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4607 /* In big-endian mode, an anonymous SFmode value must be represented
4608 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4609 the value into the high half of the general register. */
4610 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4611 return gen_rtx_PARALLEL (mode,
4613 gen_rtx_EXPR_LIST (VOIDmode,
4614 gen_rtx_REG (DImode, basereg + cum->words + offset),
4617 return gen_rtx_REG (mode, basereg + cum->words + offset);
4619 /* If there is no prototype, then FP values go in both FR and GR
4623 /* See comment above. */
4624 enum machine_mode inner_mode =
4625 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4627 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4628 gen_rtx_REG (mode, (FR_ARG_FIRST
4631 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4632 gen_rtx_REG (inner_mode,
4633 (basereg + cum->words
4637 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4641 /* Implement TARGET_FUNCION_ARG target hook. */
4644 ia64_function_arg (cumulative_args_t cum, enum machine_mode mode,
4645 const_tree type, bool named)
4647 return ia64_function_arg_1 (cum, mode, type, named, false);
4650 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4653 ia64_function_incoming_arg (cumulative_args_t cum,
4654 enum machine_mode mode,
4655 const_tree type, bool named)
4657 return ia64_function_arg_1 (cum, mode, type, named, true);
4660 /* Return number of bytes, at the beginning of the argument, that must be
4661 put in registers. 0 is the argument is entirely in registers or entirely
4665 ia64_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
4666 tree type, bool named ATTRIBUTE_UNUSED)
4668 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4670 int words = ia64_function_arg_words (type, mode);
4671 int offset = ia64_function_arg_offset (cum, type, words);
4673 /* If all argument slots are used, then it must go on the stack. */
4674 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4677 /* It doesn't matter whether the argument goes in FR or GR regs. If
4678 it fits within the 8 argument slots, then it goes entirely in
4679 registers. If it extends past the last argument slot, then the rest
4680 goes on the stack. */
4682 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4685 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4688 /* Return ivms_arg_type based on machine_mode. */
4690 static enum ivms_arg_type
4691 ia64_arg_type (enum machine_mode mode)
4704 /* Update CUM to point after this argument. This is patterned after
4705 ia64_function_arg. */
4708 ia64_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
4709 const_tree type, bool named)
4711 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4712 int words = ia64_function_arg_words (type, mode);
4713 int offset = ia64_function_arg_offset (cum, type, words);
4714 enum machine_mode hfa_mode = VOIDmode;
4716 /* If all arg slots are already full, then there is nothing to do. */
4717 if (cum->words >= MAX_ARGUMENT_SLOTS)
4719 cum->words += words + offset;
4723 cum->atypes[cum->words] = ia64_arg_type (mode);
4724 cum->words += words + offset;
4726 /* Check for and handle homogeneous FP aggregates. */
4728 hfa_mode = hfa_element_mode (type, 0);
4730 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4731 and unprototyped hfas are passed specially. */
4732 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4734 int fp_regs = cum->fp_regs;
4735 /* This is the original value of cum->words + offset. */
4736 int int_regs = cum->words - words;
4737 int hfa_size = GET_MODE_SIZE (hfa_mode);
4741 /* If prototyped, pass it in FR regs then GR regs.
4742 If not prototyped, pass it in both FR and GR regs.
4744 If this is an SFmode aggregate, then it is possible to run out of
4745 FR regs while GR regs are still left. In that case, we pass the
4746 remaining part in the GR regs. */
4748 /* Fill the FP regs. We do this always. We stop if we reach the end
4749 of the argument, the last FP register, or the last argument slot. */
4751 byte_size = ((mode == BLKmode)
4752 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4753 args_byte_size = int_regs * UNITS_PER_WORD;
4755 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4756 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4759 args_byte_size += hfa_size;
4763 cum->fp_regs = fp_regs;
4766 /* On OpenVMS variable argument is either in Rn or Fn. */
4767 else if (TARGET_ABI_OPEN_VMS && named == 0)
4769 cum->int_regs = cum->words;
4770 cum->fp_regs = cum->words;
4773 /* Integral and aggregates go in general registers. So do TFmode FP values.
4774 If we have run out of FR registers, then other FP values must also go in
4775 general registers. This can happen when we have a SFmode HFA. */
4776 else if (mode == TFmode || mode == TCmode
4777 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4778 cum->int_regs = cum->words;
4780 /* If there is a prototype, then FP values go in a FR register when
4781 named, and in a GR register when unnamed. */
4782 else if (cum->prototype)
4785 cum->int_regs = cum->words;
4787 /* ??? Complex types should not reach here. */
4788 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4790 /* If there is no prototype, then FP values go in both FR and GR
4794 /* ??? Complex types should not reach here. */
4795 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4796 cum->int_regs = cum->words;
4800 /* Arguments with alignment larger than 8 bytes start at the next even
4801 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4802 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4805 ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
4807 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4808 return PARM_BOUNDARY * 2;
4812 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4813 return PARM_BOUNDARY * 2;
4815 return PARM_BOUNDARY;
4818 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4819 return PARM_BOUNDARY * 2;
4821 return PARM_BOUNDARY;
4824 /* True if it is OK to do sibling call optimization for the specified
4825 call expression EXP. DECL will be the called function, or NULL if
4826 this is an indirect call. */
4828 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4830 /* We can't perform a sibcall if the current function has the syscall_linkage
4832 if (lookup_attribute ("syscall_linkage",
4833 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4836 /* We must always return with our current GP. This means we can
4837 only sibcall to functions defined in the current module unless
4838 TARGET_CONST_GP is set to true. */
4839 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4843 /* Implement va_arg. */
4846 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4849 /* Variable sized types are passed by reference. */
4850 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4852 tree ptrtype = build_pointer_type (type);
4853 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4854 return build_va_arg_indirect_ref (addr);
4857 /* Aggregate arguments with alignment larger than 8 bytes start at
4858 the next even boundary. Integer and floating point arguments
4859 do so if they are larger than 8 bytes, whether or not they are
4860 also aligned larger than 8 bytes. */
4861 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4862 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4864 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
4865 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4866 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
4867 gimplify_assign (unshare_expr (valist), t, pre_p);
4870 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4873 /* Return 1 if function return value returned in memory. Return 0 if it is
4877 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4879 enum machine_mode mode;
4880 enum machine_mode hfa_mode;
4881 HOST_WIDE_INT byte_size;
4883 mode = TYPE_MODE (valtype);
4884 byte_size = GET_MODE_SIZE (mode);
4885 if (mode == BLKmode)
4887 byte_size = int_size_in_bytes (valtype);
4892 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4894 hfa_mode = hfa_element_mode (valtype, 0);
4895 if (hfa_mode != VOIDmode)
4897 int hfa_size = GET_MODE_SIZE (hfa_mode);
4899 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4904 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4910 /* Return rtx for register that holds the function return value. */
4913 ia64_function_value (const_tree valtype,
4914 const_tree fn_decl_or_type,
4915 bool outgoing ATTRIBUTE_UNUSED)
4917 enum machine_mode mode;
4918 enum machine_mode hfa_mode;
4920 const_tree func = fn_decl_or_type;
4923 && !DECL_P (fn_decl_or_type))
4926 mode = TYPE_MODE (valtype);
4927 hfa_mode = hfa_element_mode (valtype, 0);
4929 if (hfa_mode != VOIDmode)
4937 hfa_size = GET_MODE_SIZE (hfa_mode);
4938 byte_size = ((mode == BLKmode)
4939 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4941 for (i = 0; offset < byte_size; i++)
4943 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4944 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4948 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4950 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4951 return gen_rtx_REG (mode, FR_ARG_FIRST);
4954 bool need_parallel = false;
4956 /* In big-endian mode, we need to manage the layout of aggregates
4957 in the registers so that we get the bits properly aligned in
4958 the highpart of the registers. */
4959 if (BYTES_BIG_ENDIAN
4960 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4961 need_parallel = true;
4963 /* Something like struct S { long double x; char a[0] } is not an
4964 HFA structure, and therefore doesn't go in fp registers. But
4965 the middle-end will give it XFmode anyway, and XFmode values
4966 don't normally fit in integer registers. So we need to smuggle
4967 the value inside a parallel. */
4968 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4969 need_parallel = true;
4979 bytesize = int_size_in_bytes (valtype);
4980 /* An empty PARALLEL is invalid here, but the return value
4981 doesn't matter for empty structs. */
4983 return gen_rtx_REG (mode, GR_RET_FIRST);
4984 for (i = 0; offset < bytesize; i++)
4986 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4987 gen_rtx_REG (DImode,
4990 offset += UNITS_PER_WORD;
4992 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4995 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4996 func ? TREE_TYPE (func) : NULL_TREE,
4999 return gen_rtx_REG (mode, GR_RET_FIRST);
5003 /* Worker function for TARGET_LIBCALL_VALUE. */
5006 ia64_libcall_value (enum machine_mode mode,
5007 const_rtx fun ATTRIBUTE_UNUSED)
5009 return gen_rtx_REG (mode,
5010 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5011 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5012 && (mode) != TFmode)
5013 ? FR_RET_FIRST : GR_RET_FIRST));
5016 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5019 ia64_function_value_regno_p (const unsigned int regno)
5021 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5022 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5025 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5026 We need to emit DTP-relative relocations. */
5029 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5031 gcc_assert (size == 4 || size == 8);
5033 fputs ("\tdata4.ua\t@dtprel(", file);
5035 fputs ("\tdata8.ua\t@dtprel(", file);
5036 output_addr_const (file, x);
5040 /* Print a memory address as an operand to reference that memory location. */
5042 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5043 also call this from ia64_print_operand for memory addresses. */
5046 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5047 rtx address ATTRIBUTE_UNUSED)
5051 /* Print an operand to an assembler instruction.
5052 C Swap and print a comparison operator.
5053 D Print an FP comparison operator.
5054 E Print 32 - constant, for SImode shifts as extract.
5055 e Print 64 - constant, for DImode rotates.
5056 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5057 a floating point register emitted normally.
5058 G A floating point constant.
5059 I Invert a predicate register by adding 1.
5060 J Select the proper predicate register for a condition.
5061 j Select the inverse predicate register for a condition.
5062 O Append .acq for volatile load.
5063 P Postincrement of a MEM.
5064 Q Append .rel for volatile store.
5065 R Print .s .d or nothing for a single, double or no truncation.
5066 S Shift amount for shladd instruction.
5067 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5068 for Intel assembler.
5069 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5070 for Intel assembler.
5071 X A pair of floating point registers.
5072 r Print register name, or constant 0 as r0. HP compatibility for
5074 v Print vector constant value as an 8-byte integer value. */
5077 ia64_print_operand (FILE * file, rtx x, int code)
5084 /* Handled below. */
5089 enum rtx_code c = swap_condition (GET_CODE (x));
5090 fputs (GET_RTX_NAME (c), file);
5095 switch (GET_CODE (x))
5119 str = GET_RTX_NAME (GET_CODE (x));
5126 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5130 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5134 if (x == CONST0_RTX (GET_MODE (x)))
5135 str = reg_names [FR_REG (0)];
5136 else if (x == CONST1_RTX (GET_MODE (x)))
5137 str = reg_names [FR_REG (1)];
5140 gcc_assert (GET_CODE (x) == REG);
5141 str = reg_names [REGNO (x)];
5150 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5151 real_to_target (val, &rv, GET_MODE (x));
5152 if (GET_MODE (x) == SFmode)
5153 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5154 else if (GET_MODE (x) == DFmode)
5155 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5157 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5160 output_operand_lossage ("invalid %%G mode");
5165 fputs (reg_names [REGNO (x) + 1], file);
5171 unsigned int regno = REGNO (XEXP (x, 0));
5172 if (GET_CODE (x) == EQ)
5176 fputs (reg_names [regno], file);
5181 if (MEM_VOLATILE_P (x))
5182 fputs(".acq", file);
5187 HOST_WIDE_INT value;
5189 switch (GET_CODE (XEXP (x, 0)))
5195 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5196 if (GET_CODE (x) == CONST_INT)
5200 gcc_assert (GET_CODE (x) == REG);
5201 fprintf (file, ", %s", reg_names[REGNO (x)]);
5207 value = GET_MODE_SIZE (GET_MODE (x));
5211 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5215 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5220 if (MEM_VOLATILE_P (x))
5221 fputs(".rel", file);
5225 if (x == CONST0_RTX (GET_MODE (x)))
5227 else if (x == CONST1_RTX (GET_MODE (x)))
5229 else if (x == CONST2_RTX (GET_MODE (x)))
5232 output_operand_lossage ("invalid %%R value");
5236 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5240 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5242 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5248 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5250 const char *prefix = "0x";
5251 if (INTVAL (x) & 0x80000000)
5253 fprintf (file, "0xffffffff");
5256 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5263 unsigned int regno = REGNO (x);
5264 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5269 /* If this operand is the constant zero, write it as register zero.
5270 Any register, zero, or CONST_INT value is OK here. */
5271 if (GET_CODE (x) == REG)
5272 fputs (reg_names[REGNO (x)], file);
5273 else if (x == CONST0_RTX (GET_MODE (x)))
5275 else if (GET_CODE (x) == CONST_INT)
5276 output_addr_const (file, x);
5278 output_operand_lossage ("invalid %%r value");
5282 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5283 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5290 /* For conditional branches, returns or calls, substitute
5291 sptk, dptk, dpnt, or spnt for %s. */
5292 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5295 int pred_val = INTVAL (XEXP (x, 0));
5297 /* Guess top and bottom 10% statically predicted. */
5298 if (pred_val < REG_BR_PROB_BASE / 50
5299 && br_prob_note_reliable_p (x))
5301 else if (pred_val < REG_BR_PROB_BASE / 2)
5303 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5304 || !br_prob_note_reliable_p (x))
5309 else if (GET_CODE (current_output_insn) == CALL_INSN)
5314 fputs (which, file);
5319 x = current_insn_predicate;
5322 unsigned int regno = REGNO (XEXP (x, 0));
5323 if (GET_CODE (x) == EQ)
5325 fprintf (file, "(%s) ", reg_names [regno]);
5330 output_operand_lossage ("ia64_print_operand: unknown code");
5334 switch (GET_CODE (x))
5336 /* This happens for the spill/restore instructions. */
5341 /* ... fall through ... */
5344 fputs (reg_names [REGNO (x)], file);
5349 rtx addr = XEXP (x, 0);
5350 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5351 addr = XEXP (addr, 0);
5352 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5357 output_addr_const (file, x);
5364 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5367 ia64_print_operand_punct_valid_p (unsigned char code)
5369 return (code == '+' || code == ',');
5372 /* Compute a (partial) cost for rtx X. Return true if the complete
5373 cost has been computed, and false if subexpressions should be
5374 scanned. In either case, *TOTAL contains the cost result. */
5375 /* ??? This is incomplete. */
5378 ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5379 int *total, bool speed ATTRIBUTE_UNUSED)
5387 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5390 if (satisfies_constraint_I (x))
5392 else if (satisfies_constraint_J (x))
5395 *total = COSTS_N_INSNS (1);
5398 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5401 *total = COSTS_N_INSNS (1);
5406 *total = COSTS_N_INSNS (1);
5412 *total = COSTS_N_INSNS (3);
5416 *total = COSTS_N_INSNS (4);
5420 /* For multiplies wider than HImode, we have to go to the FPU,
5421 which normally involves copies. Plus there's the latency
5422 of the multiply itself, and the latency of the instructions to
5423 transfer integer regs to FP regs. */
5424 if (FLOAT_MODE_P (GET_MODE (x)))
5425 *total = COSTS_N_INSNS (4);
5426 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5427 *total = COSTS_N_INSNS (10);
5429 *total = COSTS_N_INSNS (2);
5434 if (FLOAT_MODE_P (GET_MODE (x)))
5436 *total = COSTS_N_INSNS (4);
5444 *total = COSTS_N_INSNS (1);
5451 /* We make divide expensive, so that divide-by-constant will be
5452 optimized to a multiply. */
5453 *total = COSTS_N_INSNS (60);
5461 /* Calculate the cost of moving data from a register in class FROM to
5462 one in class TO, using MODE. */
5465 ia64_register_move_cost (enum machine_mode mode, reg_class_t from,
5468 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5469 if (to == ADDL_REGS)
5471 if (from == ADDL_REGS)
5474 /* All costs are symmetric, so reduce cases by putting the
5475 lower number class as the destination. */
5478 reg_class_t tmp = to;
5479 to = from, from = tmp;
5482 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5483 so that we get secondary memory reloads. Between FR_REGS,
5484 we have to make this at least as expensive as memory_move_cost
5485 to avoid spectacularly poor register class preferencing. */
5486 if (mode == XFmode || mode == RFmode)
5488 if (to != GR_REGS || from != GR_REGS)
5489 return memory_move_cost (mode, to, false);
5497 /* Moving between PR registers takes two insns. */
5498 if (from == PR_REGS)
5500 /* Moving between PR and anything but GR is impossible. */
5501 if (from != GR_REGS)
5502 return memory_move_cost (mode, to, false);
5506 /* Moving between BR and anything but GR is impossible. */
5507 if (from != GR_REGS && from != GR_AND_BR_REGS)
5508 return memory_move_cost (mode, to, false);
5513 /* Moving between AR and anything but GR is impossible. */
5514 if (from != GR_REGS)
5515 return memory_move_cost (mode, to, false);
5521 case GR_AND_FR_REGS:
5522 case GR_AND_BR_REGS:
5533 /* Calculate the cost of moving data of MODE from a register to or from
5537 ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5539 bool in ATTRIBUTE_UNUSED)
5541 if (rclass == GENERAL_REGS
5542 || rclass == FR_REGS
5543 || rclass == FP_REGS
5544 || rclass == GR_AND_FR_REGS)
5550 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5551 on RCLASS to use when copying X into that class. */
5554 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5560 /* Don't allow volatile mem reloads into floating point registers.
5561 This is defined to force reload to choose the r/m case instead
5562 of the f/f case when reloading (set (reg fX) (mem/v)). */
5563 if (MEM_P (x) && MEM_VOLATILE_P (x))
5566 /* Force all unrecognized constants into the constant pool. */
5584 /* This function returns the register class required for a secondary
5585 register when copying between one of the registers in RCLASS, and X,
5586 using MODE. A return value of NO_REGS means that no secondary register
5590 ia64_secondary_reload_class (enum reg_class rclass,
5591 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5595 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5596 regno = true_regnum (x);
5603 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5604 interaction. We end up with two pseudos with overlapping lifetimes
5605 both of which are equiv to the same constant, and both which need
5606 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5607 changes depending on the path length, which means the qty_first_reg
5608 check in make_regs_eqv can give different answers at different times.
5609 At some point I'll probably need a reload_indi pattern to handle
5612 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5613 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5614 non-general registers for good measure. */
5615 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5618 /* This is needed if a pseudo used as a call_operand gets spilled to a
5620 if (GET_CODE (x) == MEM)
5626 /* Need to go through general registers to get to other class regs. */
5627 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5630 /* This can happen when a paradoxical subreg is an operand to the
5632 /* ??? This shouldn't be necessary after instruction scheduling is
5633 enabled, because paradoxical subregs are not accepted by
5634 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5635 stop the paradoxical subreg stupidity in the *_operand functions
5637 if (GET_CODE (x) == MEM
5638 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5639 || GET_MODE (x) == QImode))
5642 /* This can happen because of the ior/and/etc patterns that accept FP
5643 registers as operands. If the third operand is a constant, then it
5644 needs to be reloaded into a FP register. */
5645 if (GET_CODE (x) == CONST_INT)
5648 /* This can happen because of register elimination in a muldi3 insn.
5649 E.g. `26107 * (unsigned long)&u'. */
5650 if (GET_CODE (x) == PLUS)
5655 /* ??? This happens if we cse/gcse a BImode value across a call,
5656 and the function has a nonlocal goto. This is because global
5657 does not allocate call crossing pseudos to hard registers when
5658 crtl->has_nonlocal_goto is true. This is relatively
5659 common for C++ programs that use exceptions. To reproduce,
5660 return NO_REGS and compile libstdc++. */
5661 if (GET_CODE (x) == MEM)
5664 /* This can happen when we take a BImode subreg of a DImode value,
5665 and that DImode value winds up in some non-GR register. */
5666 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5678 /* Implement targetm.unspec_may_trap_p hook. */
5680 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5682 if (GET_CODE (x) == UNSPEC)
5684 switch (XINT (x, 1))
5690 case UNSPEC_CHKACLR:
5692 /* These unspecs are just wrappers. */
5693 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5697 return default_unspec_may_trap_p (x, flags);
5701 /* Parse the -mfixed-range= option string. */
5704 fix_range (const char *const_str)
5707 char *str, *dash, *comma;
5709 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5710 REG2 are either register names or register numbers. The effect
5711 of this option is to mark the registers in the range from REG1 to
5712 REG2 as ``fixed'' so they won't be used by the compiler. This is
5713 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5715 i = strlen (const_str);
5716 str = (char *) alloca (i + 1);
5717 memcpy (str, const_str, i + 1);
5721 dash = strchr (str, '-');
5724 warning (0, "value of -mfixed-range must have form REG1-REG2");
5729 comma = strchr (dash + 1, ',');
5733 first = decode_reg_name (str);
5736 warning (0, "unknown register name: %s", str);
5740 last = decode_reg_name (dash + 1);
5743 warning (0, "unknown register name: %s", dash + 1);
5751 warning (0, "%s-%s is an empty range", str, dash + 1);
5755 for (i = first; i <= last; ++i)
5756 fixed_regs[i] = call_used_regs[i] = 1;
5766 /* Implement TARGET_OPTION_OVERRIDE. */
5769 ia64_option_override (void)
5772 cl_deferred_option *opt;
5773 VEC(cl_deferred_option,heap) *vec
5774 = (VEC(cl_deferred_option,heap) *) ia64_deferred_options;
5776 FOR_EACH_VEC_ELT (cl_deferred_option, vec, i, opt)
5778 switch (opt->opt_index)
5780 case OPT_mfixed_range_:
5781 fix_range (opt->arg);
5789 if (TARGET_AUTO_PIC)
5790 target_flags |= MASK_CONST_GP;
5792 /* Numerous experiment shows that IRA based loop pressure
5793 calculation works better for RTL loop invariant motion on targets
5794 with enough (>= 32) registers. It is an expensive optimization.
5795 So it is on only for peak performance. */
5797 flag_ira_loop_pressure = 1;
5800 ia64_section_threshold = (global_options_set.x_g_switch_value
5802 : IA64_DEFAULT_GVALUE);
5804 init_machine_status = ia64_init_machine_status;
5806 if (align_functions <= 0)
5807 align_functions = 64;
5808 if (align_loops <= 0)
5810 if (TARGET_ABI_OPEN_VMS)
5813 ia64_override_options_after_change();
5816 /* Implement targetm.override_options_after_change. */
5819 ia64_override_options_after_change (void)
5822 && !global_options_set.x_flag_selective_scheduling
5823 && !global_options_set.x_flag_selective_scheduling2)
5825 flag_selective_scheduling2 = 1;
5826 flag_sel_sched_pipelining = 1;
5828 if (mflag_sched_control_spec == 2)
5830 /* Control speculation is on by default for the selective scheduler,
5831 but not for the Haifa scheduler. */
5832 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5834 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5836 /* FIXME: remove this when we'd implement breaking autoinsns as
5837 a transformation. */
5838 flag_auto_inc_dec = 0;
5842 /* Initialize the record of emitted frame related registers. */
5844 void ia64_init_expanders (void)
5846 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5849 static struct machine_function *
5850 ia64_init_machine_status (void)
5852 return ggc_alloc_cleared_machine_function ();
5855 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5856 static enum attr_type ia64_safe_type (rtx);
5858 static enum attr_itanium_class
5859 ia64_safe_itanium_class (rtx insn)
5861 if (recog_memoized (insn) >= 0)
5862 return get_attr_itanium_class (insn);
5863 else if (DEBUG_INSN_P (insn))
5864 return ITANIUM_CLASS_IGNORE;
5866 return ITANIUM_CLASS_UNKNOWN;
5869 static enum attr_type
5870 ia64_safe_type (rtx insn)
5872 if (recog_memoized (insn) >= 0)
5873 return get_attr_type (insn);
5875 return TYPE_UNKNOWN;
5878 /* The following collection of routines emit instruction group stop bits as
5879 necessary to avoid dependencies. */
5881 /* Need to track some additional registers as far as serialization is
5882 concerned so we can properly handle br.call and br.ret. We could
5883 make these registers visible to gcc, but since these registers are
5884 never explicitly used in gcc generated code, it seems wasteful to
5885 do so (plus it would make the call and return patterns needlessly
5887 #define REG_RP (BR_REG (0))
5888 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5889 /* This is used for volatile asms which may require a stop bit immediately
5890 before and after them. */
5891 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5892 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5893 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5895 /* For each register, we keep track of how it has been written in the
5896 current instruction group.
5898 If a register is written unconditionally (no qualifying predicate),
5899 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5901 If a register is written if its qualifying predicate P is true, we
5902 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5903 may be written again by the complement of P (P^1) and when this happens,
5904 WRITE_COUNT gets set to 2.
5906 The result of this is that whenever an insn attempts to write a register
5907 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5909 If a predicate register is written by a floating-point insn, we set
5910 WRITTEN_BY_FP to true.
5912 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5913 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5915 #if GCC_VERSION >= 4000
5916 #define RWS_FIELD_TYPE __extension__ unsigned short
5918 #define RWS_FIELD_TYPE unsigned int
5920 struct reg_write_state
5922 RWS_FIELD_TYPE write_count : 2;
5923 RWS_FIELD_TYPE first_pred : 10;
5924 RWS_FIELD_TYPE written_by_fp : 1;
5925 RWS_FIELD_TYPE written_by_and : 1;
5926 RWS_FIELD_TYPE written_by_or : 1;
5929 /* Cumulative info for the current instruction group. */
5930 struct reg_write_state rws_sum[NUM_REGS];
5931 #ifdef ENABLE_CHECKING
5932 /* Bitmap whether a register has been written in the current insn. */
5933 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5934 / HOST_BITS_PER_WIDEST_FAST_INT];
5937 rws_insn_set (int regno)
5939 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5940 SET_HARD_REG_BIT (rws_insn, regno);
5944 rws_insn_test (int regno)
5946 return TEST_HARD_REG_BIT (rws_insn, regno);
5949 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5950 unsigned char rws_insn[2];
5953 rws_insn_set (int regno)
5955 if (regno == REG_AR_CFM)
5957 else if (regno == REG_VOLATILE)
5962 rws_insn_test (int regno)
5964 if (regno == REG_AR_CFM)
5966 if (regno == REG_VOLATILE)
5972 /* Indicates whether this is the first instruction after a stop bit,
5973 in which case we don't need another stop bit. Without this,
5974 ia64_variable_issue will die when scheduling an alloc. */
5975 static int first_instruction;
5977 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5978 RTL for one instruction. */
5981 unsigned int is_write : 1; /* Is register being written? */
5982 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5983 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5984 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5985 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5986 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5989 static void rws_update (int, struct reg_flags, int);
5990 static int rws_access_regno (int, struct reg_flags, int);
5991 static int rws_access_reg (rtx, struct reg_flags, int);
5992 static void update_set_flags (rtx, struct reg_flags *);
5993 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5994 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5995 static void init_insn_group_barriers (void);
5996 static int group_barrier_needed (rtx);
5997 static int safe_group_barrier_needed (rtx);
5998 static int in_safe_group_barrier;
6000 /* Update *RWS for REGNO, which is being written by the current instruction,
6001 with predicate PRED, and associated register flags in FLAGS. */
6004 rws_update (int regno, struct reg_flags flags, int pred)
6007 rws_sum[regno].write_count++;
6009 rws_sum[regno].write_count = 2;
6010 rws_sum[regno].written_by_fp |= flags.is_fp;
6011 /* ??? Not tracking and/or across differing predicates. */
6012 rws_sum[regno].written_by_and = flags.is_and;
6013 rws_sum[regno].written_by_or = flags.is_or;
6014 rws_sum[regno].first_pred = pred;
6017 /* Handle an access to register REGNO of type FLAGS using predicate register
6018 PRED. Update rws_sum array. Return 1 if this access creates
6019 a dependency with an earlier instruction in the same group. */
6022 rws_access_regno (int regno, struct reg_flags flags, int pred)
6024 int need_barrier = 0;
6026 gcc_assert (regno < NUM_REGS);
6028 if (! PR_REGNO_P (regno))
6029 flags.is_and = flags.is_or = 0;
6035 rws_insn_set (regno);
6036 write_count = rws_sum[regno].write_count;
6038 switch (write_count)
6041 /* The register has not been written yet. */
6042 if (!in_safe_group_barrier)
6043 rws_update (regno, flags, pred);
6047 /* The register has been written via a predicate. Treat
6048 it like a unconditional write and do not try to check
6049 for complementary pred reg in earlier write. */
6050 if (flags.is_and && rws_sum[regno].written_by_and)
6052 else if (flags.is_or && rws_sum[regno].written_by_or)
6056 if (!in_safe_group_barrier)
6057 rws_update (regno, flags, pred);
6061 /* The register has been unconditionally written already. We
6063 if (flags.is_and && rws_sum[regno].written_by_and)
6065 else if (flags.is_or && rws_sum[regno].written_by_or)
6069 if (!in_safe_group_barrier)
6071 rws_sum[regno].written_by_and = flags.is_and;
6072 rws_sum[regno].written_by_or = flags.is_or;
6082 if (flags.is_branch)
6084 /* Branches have several RAW exceptions that allow to avoid
6087 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6088 /* RAW dependencies on branch regs are permissible as long
6089 as the writer is a non-branch instruction. Since we
6090 never generate code that uses a branch register written
6091 by a branch instruction, handling this case is
6095 if (REGNO_REG_CLASS (regno) == PR_REGS
6096 && ! rws_sum[regno].written_by_fp)
6097 /* The predicates of a branch are available within the
6098 same insn group as long as the predicate was written by
6099 something other than a floating-point instruction. */
6103 if (flags.is_and && rws_sum[regno].written_by_and)
6105 if (flags.is_or && rws_sum[regno].written_by_or)
6108 switch (rws_sum[regno].write_count)
6111 /* The register has not been written yet. */
6115 /* The register has been written via a predicate, assume we
6116 need a barrier (don't check for complementary regs). */
6121 /* The register has been unconditionally written already. We
6131 return need_barrier;
6135 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6137 int regno = REGNO (reg);
6138 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6141 return rws_access_regno (regno, flags, pred);
6144 int need_barrier = 0;
6146 need_barrier |= rws_access_regno (regno + n, flags, pred);
6147 return need_barrier;
6151 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6152 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6155 update_set_flags (rtx x, struct reg_flags *pflags)
6157 rtx src = SET_SRC (x);
6159 switch (GET_CODE (src))
6165 /* There are four cases here:
6166 (1) The destination is (pc), in which case this is a branch,
6167 nothing here applies.
6168 (2) The destination is ar.lc, in which case this is a
6169 doloop_end_internal,
6170 (3) The destination is an fp register, in which case this is
6171 an fselect instruction.
6172 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6173 this is a check load.
6174 In all cases, nothing we do in this function applies. */
6178 if (COMPARISON_P (src)
6179 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6180 /* Set pflags->is_fp to 1 so that we know we're dealing
6181 with a floating point comparison when processing the
6182 destination of the SET. */
6185 /* Discover if this is a parallel comparison. We only handle
6186 and.orcm and or.andcm at present, since we must retain a
6187 strict inverse on the predicate pair. */
6188 else if (GET_CODE (src) == AND)
6190 else if (GET_CODE (src) == IOR)
6197 /* Subroutine of rtx_needs_barrier; this function determines whether the
6198 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6199 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6203 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6205 int need_barrier = 0;
6207 rtx src = SET_SRC (x);
6209 if (GET_CODE (src) == CALL)
6210 /* We don't need to worry about the result registers that
6211 get written by subroutine call. */
6212 return rtx_needs_barrier (src, flags, pred);
6213 else if (SET_DEST (x) == pc_rtx)
6215 /* X is a conditional branch. */
6216 /* ??? This seems redundant, as the caller sets this bit for
6218 if (!ia64_spec_check_src_p (src))
6219 flags.is_branch = 1;
6220 return rtx_needs_barrier (src, flags, pred);
6223 if (ia64_spec_check_src_p (src))
6224 /* Avoid checking one register twice (in condition
6225 and in 'then' section) for ldc pattern. */
6227 gcc_assert (REG_P (XEXP (src, 2)));
6228 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6230 /* We process MEM below. */
6231 src = XEXP (src, 1);
6234 need_barrier |= rtx_needs_barrier (src, flags, pred);
6237 if (GET_CODE (dst) == ZERO_EXTRACT)
6239 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6240 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6242 return need_barrier;
6245 /* Handle an access to rtx X of type FLAGS using predicate register
6246 PRED. Return 1 if this access creates a dependency with an earlier
6247 instruction in the same group. */
6250 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6253 int is_complemented = 0;
6254 int need_barrier = 0;
6255 const char *format_ptr;
6256 struct reg_flags new_flags;
6264 switch (GET_CODE (x))
6267 update_set_flags (x, &new_flags);
6268 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6269 if (GET_CODE (SET_SRC (x)) != CALL)
6271 new_flags.is_write = 1;
6272 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6277 new_flags.is_write = 0;
6278 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6280 /* Avoid multiple register writes, in case this is a pattern with
6281 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6282 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6284 new_flags.is_write = 1;
6285 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6286 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6287 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6292 /* X is a predicated instruction. */
6294 cond = COND_EXEC_TEST (x);
6296 need_barrier = rtx_needs_barrier (cond, flags, 0);
6298 if (GET_CODE (cond) == EQ)
6299 is_complemented = 1;
6300 cond = XEXP (cond, 0);
6301 gcc_assert (GET_CODE (cond) == REG
6302 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6303 pred = REGNO (cond);
6304 if (is_complemented)
6307 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6308 return need_barrier;
6312 /* Clobber & use are for earlier compiler-phases only. */
6317 /* We always emit stop bits for traditional asms. We emit stop bits
6318 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6319 if (GET_CODE (x) != ASM_OPERANDS
6320 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6322 /* Avoid writing the register multiple times if we have multiple
6323 asm outputs. This avoids a failure in rws_access_reg. */
6324 if (! rws_insn_test (REG_VOLATILE))
6326 new_flags.is_write = 1;
6327 rws_access_regno (REG_VOLATILE, new_flags, pred);
6332 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6333 We cannot just fall through here since then we would be confused
6334 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6335 traditional asms unlike their normal usage. */
6337 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6338 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6343 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6345 rtx pat = XVECEXP (x, 0, i);
6346 switch (GET_CODE (pat))
6349 update_set_flags (pat, &new_flags);
6350 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6356 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6360 if (REG_P (XEXP (pat, 0))
6361 && extract_asm_operands (x) != NULL_RTX
6362 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6364 new_flags.is_write = 1;
6365 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6378 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6380 rtx pat = XVECEXP (x, 0, i);
6381 if (GET_CODE (pat) == SET)
6383 if (GET_CODE (SET_SRC (pat)) != CALL)
6385 new_flags.is_write = 1;
6386 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6390 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6391 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6396 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6399 if (REGNO (x) == AR_UNAT_REGNUM)
6401 for (i = 0; i < 64; ++i)
6402 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6405 need_barrier = rws_access_reg (x, flags, pred);
6409 /* Find the regs used in memory address computation. */
6410 new_flags.is_write = 0;
6411 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6414 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6415 case SYMBOL_REF: case LABEL_REF: case CONST:
6418 /* Operators with side-effects. */
6419 case POST_INC: case POST_DEC:
6420 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6422 new_flags.is_write = 0;
6423 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6424 new_flags.is_write = 1;
6425 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6429 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6431 new_flags.is_write = 0;
6432 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6433 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6434 new_flags.is_write = 1;
6435 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6438 /* Handle common unary and binary ops for efficiency. */
6439 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6440 case MOD: case UDIV: case UMOD: case AND: case IOR:
6441 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6442 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6443 case NE: case EQ: case GE: case GT: case LE:
6444 case LT: case GEU: case GTU: case LEU: case LTU:
6445 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6446 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6449 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6450 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6451 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6452 case SQRT: case FFS: case POPCOUNT:
6453 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6457 /* VEC_SELECT's second argument is a PARALLEL with integers that
6458 describe the elements selected. On ia64, those integers are
6459 always constants. Avoid walking the PARALLEL so that we don't
6460 get confused with "normal" parallels and then die. */
6461 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6465 switch (XINT (x, 1))
6467 case UNSPEC_LTOFF_DTPMOD:
6468 case UNSPEC_LTOFF_DTPREL:
6470 case UNSPEC_LTOFF_TPREL:
6472 case UNSPEC_PRED_REL_MUTEX:
6473 case UNSPEC_PIC_CALL:
6475 case UNSPEC_FETCHADD_ACQ:
6476 case UNSPEC_FETCHADD_REL:
6477 case UNSPEC_BSP_VALUE:
6478 case UNSPEC_FLUSHRS:
6479 case UNSPEC_BUNDLE_SELECTOR:
6482 case UNSPEC_GR_SPILL:
6483 case UNSPEC_GR_RESTORE:
6485 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6486 HOST_WIDE_INT bit = (offset >> 3) & 63;
6488 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6489 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6490 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6495 case UNSPEC_FR_SPILL:
6496 case UNSPEC_FR_RESTORE:
6497 case UNSPEC_GETF_EXP:
6498 case UNSPEC_SETF_EXP:
6500 case UNSPEC_FR_SQRT_RECIP_APPROX:
6501 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6506 case UNSPEC_CHKACLR:
6508 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6511 case UNSPEC_FR_RECIP_APPROX:
6513 case UNSPEC_COPYSIGN:
6514 case UNSPEC_FR_RECIP_APPROX_RES:
6515 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6516 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6519 case UNSPEC_CMPXCHG_ACQ:
6520 case UNSPEC_CMPXCHG_REL:
6521 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6522 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6530 case UNSPEC_VOLATILE:
6531 switch (XINT (x, 1))
6534 /* Alloc must always be the first instruction of a group.
6535 We force this by always returning true. */
6536 /* ??? We might get better scheduling if we explicitly check for
6537 input/local/output register dependencies, and modify the
6538 scheduler so that alloc is always reordered to the start of
6539 the current group. We could then eliminate all of the
6540 first_instruction code. */
6541 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6543 new_flags.is_write = 1;
6544 rws_access_regno (REG_AR_CFM, new_flags, pred);
6547 case UNSPECV_SET_BSP:
6551 case UNSPECV_BLOCKAGE:
6552 case UNSPECV_INSN_GROUP_BARRIER:
6554 case UNSPECV_PSAC_ALL:
6555 case UNSPECV_PSAC_NORMAL:
6564 new_flags.is_write = 0;
6565 need_barrier = rws_access_regno (REG_RP, flags, pred);
6566 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6568 new_flags.is_write = 1;
6569 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6570 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6574 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6575 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6576 switch (format_ptr[i])
6578 case '0': /* unused field */
6579 case 'i': /* integer */
6580 case 'n': /* note */
6581 case 'w': /* wide integer */
6582 case 's': /* pointer to string */
6583 case 'S': /* optional pointer to string */
6587 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6592 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6593 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6602 return need_barrier;
6605 /* Clear out the state for group_barrier_needed at the start of a
6606 sequence of insns. */
6609 init_insn_group_barriers (void)
6611 memset (rws_sum, 0, sizeof (rws_sum));
6612 first_instruction = 1;
6615 /* Given the current state, determine whether a group barrier (a stop bit) is
6616 necessary before INSN. Return nonzero if so. This modifies the state to
6617 include the effects of INSN as a side-effect. */
6620 group_barrier_needed (rtx insn)
6623 int need_barrier = 0;
6624 struct reg_flags flags;
6626 memset (&flags, 0, sizeof (flags));
6627 switch (GET_CODE (insn))
6634 /* A barrier doesn't imply an instruction group boundary. */
6638 memset (rws_insn, 0, sizeof (rws_insn));
6642 flags.is_branch = 1;
6643 flags.is_sibcall = SIBLING_CALL_P (insn);
6644 memset (rws_insn, 0, sizeof (rws_insn));
6646 /* Don't bundle a call following another call. */
6647 if ((pat = prev_active_insn (insn))
6648 && GET_CODE (pat) == CALL_INSN)
6654 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6658 if (!ia64_spec_check_p (insn))
6659 flags.is_branch = 1;
6661 /* Don't bundle a jump following a call. */
6662 if ((pat = prev_active_insn (insn))
6663 && GET_CODE (pat) == CALL_INSN)
6671 if (GET_CODE (PATTERN (insn)) == USE
6672 || GET_CODE (PATTERN (insn)) == CLOBBER)
6673 /* Don't care about USE and CLOBBER "insns"---those are used to
6674 indicate to the optimizer that it shouldn't get rid of
6675 certain operations. */
6678 pat = PATTERN (insn);
6680 /* Ug. Hack hacks hacked elsewhere. */
6681 switch (recog_memoized (insn))
6683 /* We play dependency tricks with the epilogue in order
6684 to get proper schedules. Undo this for dv analysis. */
6685 case CODE_FOR_epilogue_deallocate_stack:
6686 case CODE_FOR_prologue_allocate_stack:
6687 pat = XVECEXP (pat, 0, 0);
6690 /* The pattern we use for br.cloop confuses the code above.
6691 The second element of the vector is representative. */
6692 case CODE_FOR_doloop_end_internal:
6693 pat = XVECEXP (pat, 0, 1);
6696 /* Doesn't generate code. */
6697 case CODE_FOR_pred_rel_mutex:
6698 case CODE_FOR_prologue_use:
6705 memset (rws_insn, 0, sizeof (rws_insn));
6706 need_barrier = rtx_needs_barrier (pat, flags, 0);
6708 /* Check to see if the previous instruction was a volatile
6711 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6719 if (first_instruction && INSN_P (insn)
6720 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6721 && GET_CODE (PATTERN (insn)) != USE
6722 && GET_CODE (PATTERN (insn)) != CLOBBER)
6725 first_instruction = 0;
6728 return need_barrier;
6731 /* Like group_barrier_needed, but do not clobber the current state. */
6734 safe_group_barrier_needed (rtx insn)
6736 int saved_first_instruction;
6739 saved_first_instruction = first_instruction;
6740 in_safe_group_barrier = 1;
6742 t = group_barrier_needed (insn);
6744 first_instruction = saved_first_instruction;
6745 in_safe_group_barrier = 0;
6750 /* Scan the current function and insert stop bits as necessary to
6751 eliminate dependencies. This function assumes that a final
6752 instruction scheduling pass has been run which has already
6753 inserted most of the necessary stop bits. This function only
6754 inserts new ones at basic block boundaries, since these are
6755 invisible to the scheduler. */
6758 emit_insn_group_barriers (FILE *dump)
6762 int insns_since_last_label = 0;
6764 init_insn_group_barriers ();
6766 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6768 if (GET_CODE (insn) == CODE_LABEL)
6770 if (insns_since_last_label)
6772 insns_since_last_label = 0;
6774 else if (GET_CODE (insn) == NOTE
6775 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6777 if (insns_since_last_label)
6779 insns_since_last_label = 0;
6781 else if (GET_CODE (insn) == INSN
6782 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6783 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6785 init_insn_group_barriers ();
6788 else if (NONDEBUG_INSN_P (insn))
6790 insns_since_last_label = 1;
6792 if (group_barrier_needed (insn))
6797 fprintf (dump, "Emitting stop before label %d\n",
6798 INSN_UID (last_label));
6799 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6802 init_insn_group_barriers ();
6810 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6811 This function has to emit all necessary group barriers. */
6814 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6818 init_insn_group_barriers ();
6820 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6822 if (GET_CODE (insn) == BARRIER)
6824 rtx last = prev_active_insn (insn);
6828 if (GET_CODE (last) == JUMP_INSN
6829 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6830 last = prev_active_insn (last);
6831 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6832 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6834 init_insn_group_barriers ();
6836 else if (NONDEBUG_INSN_P (insn))
6838 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6839 init_insn_group_barriers ();
6840 else if (group_barrier_needed (insn))
6842 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6843 init_insn_group_barriers ();
6844 group_barrier_needed (insn);
6852 /* Instruction scheduling support. */
6854 #define NR_BUNDLES 10
6856 /* A list of names of all available bundles. */
6858 static const char *bundle_name [NR_BUNDLES] =
6864 #if NR_BUNDLES == 10
6874 /* Nonzero if we should insert stop bits into the schedule. */
6876 int ia64_final_schedule = 0;
6878 /* Codes of the corresponding queried units: */
6880 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6881 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6883 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6884 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6886 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6888 /* The following variable value is an insn group barrier. */
6890 static rtx dfa_stop_insn;
6892 /* The following variable value is the last issued insn. */
6894 static rtx last_scheduled_insn;
6896 /* The following variable value is pointer to a DFA state used as
6897 temporary variable. */
6899 static state_t temp_dfa_state = NULL;
6901 /* The following variable value is DFA state after issuing the last
6904 static state_t prev_cycle_state = NULL;
6906 /* The following array element values are TRUE if the corresponding
6907 insn requires to add stop bits before it. */
6909 static char *stops_p = NULL;
6911 /* The following variable is used to set up the mentioned above array. */
6913 static int stop_before_p = 0;
6915 /* The following variable value is length of the arrays `clocks' and
6918 static int clocks_length;
6920 /* The following variable value is number of data speculations in progress. */
6921 static int pending_data_specs = 0;
6923 /* Number of memory references on current and three future processor cycles. */
6924 static char mem_ops_in_group[4];
6926 /* Number of current processor cycle (from scheduler's point of view). */
6927 static int current_cycle;
6929 static rtx ia64_single_set (rtx);
6930 static void ia64_emit_insn_before (rtx, rtx);
6932 /* Map a bundle number to its pseudo-op. */
6935 get_bundle_name (int b)
6937 return bundle_name[b];
6941 /* Return the maximum number of instructions a cpu can issue. */
6944 ia64_issue_rate (void)
6949 /* Helper function - like single_set, but look inside COND_EXEC. */
6952 ia64_single_set (rtx insn)
6954 rtx x = PATTERN (insn), ret;
6955 if (GET_CODE (x) == COND_EXEC)
6956 x = COND_EXEC_CODE (x);
6957 if (GET_CODE (x) == SET)
6960 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6961 Although they are not classical single set, the second set is there just
6962 to protect it from moving past FP-relative stack accesses. */
6963 switch (recog_memoized (insn))
6965 case CODE_FOR_prologue_allocate_stack:
6966 case CODE_FOR_epilogue_deallocate_stack:
6967 ret = XVECEXP (x, 0, 0);
6971 ret = single_set_2 (insn, x);
6978 /* Adjust the cost of a scheduling dependency.
6979 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6980 COST is the current cost, DW is dependency weakness. */
6982 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6984 enum reg_note dep_type = (enum reg_note) dep_type1;
6985 enum attr_itanium_class dep_class;
6986 enum attr_itanium_class insn_class;
6988 insn_class = ia64_safe_itanium_class (insn);
6989 dep_class = ia64_safe_itanium_class (dep_insn);
6991 /* Treat true memory dependencies separately. Ignore apparent true
6992 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6993 if (dep_type == REG_DEP_TRUE
6994 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6995 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6998 if (dw == MIN_DEP_WEAK)
6999 /* Store and load are likely to alias, use higher cost to avoid stall. */
7000 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7001 else if (dw > MIN_DEP_WEAK)
7003 /* Store and load are less likely to alias. */
7004 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7005 /* Assume there will be no cache conflict for floating-point data.
7006 For integer data, L1 conflict penalty is huge (17 cycles), so we
7007 never assume it will not cause a conflict. */
7013 if (dep_type != REG_DEP_OUTPUT)
7016 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7017 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7023 /* Like emit_insn_before, but skip cycle_display notes.
7024 ??? When cycle display notes are implemented, update this. */
7027 ia64_emit_insn_before (rtx insn, rtx before)
7029 emit_insn_before (insn, before);
7032 /* The following function marks insns who produce addresses for load
7033 and store insns. Such insns will be placed into M slots because it
7034 decrease latency time for Itanium1 (see function
7035 `ia64_produce_address_p' and the DFA descriptions). */
7038 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
7040 rtx insn, next, next_tail;
7042 /* Before reload, which_alternative is not set, which means that
7043 ia64_safe_itanium_class will produce wrong results for (at least)
7044 move instructions. */
7045 if (!reload_completed)
7048 next_tail = NEXT_INSN (tail);
7049 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7052 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7054 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7056 sd_iterator_def sd_it;
7058 bool has_mem_op_consumer_p = false;
7060 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7062 enum attr_itanium_class c;
7064 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7067 next = DEP_CON (dep);
7068 c = ia64_safe_itanium_class (next);
7069 if ((c == ITANIUM_CLASS_ST
7070 || c == ITANIUM_CLASS_STF)
7071 && ia64_st_address_bypass_p (insn, next))
7073 has_mem_op_consumer_p = true;
7076 else if ((c == ITANIUM_CLASS_LD
7077 || c == ITANIUM_CLASS_FLD
7078 || c == ITANIUM_CLASS_FLDP)
7079 && ia64_ld_address_bypass_p (insn, next))
7081 has_mem_op_consumer_p = true;
7086 insn->call = has_mem_op_consumer_p;
7090 /* We're beginning a new block. Initialize data structures as necessary. */
7093 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7094 int sched_verbose ATTRIBUTE_UNUSED,
7095 int max_ready ATTRIBUTE_UNUSED)
7097 #ifdef ENABLE_CHECKING
7100 if (!sel_sched_p () && reload_completed)
7101 for (insn = NEXT_INSN (current_sched_info->prev_head);
7102 insn != current_sched_info->next_tail;
7103 insn = NEXT_INSN (insn))
7104 gcc_assert (!SCHED_GROUP_P (insn));
7106 last_scheduled_insn = NULL_RTX;
7107 init_insn_group_barriers ();
7110 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7113 /* We're beginning a scheduling pass. Check assertion. */
7116 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7117 int sched_verbose ATTRIBUTE_UNUSED,
7118 int max_ready ATTRIBUTE_UNUSED)
7120 gcc_assert (pending_data_specs == 0);
7123 /* Scheduling pass is now finished. Free/reset static variable. */
7125 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7126 int sched_verbose ATTRIBUTE_UNUSED)
7128 gcc_assert (pending_data_specs == 0);
7131 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7132 speculation check), FALSE otherwise. */
7134 is_load_p (rtx insn)
7136 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7139 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7140 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7143 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7144 (taking account for 3-cycle cache reference postponing for stores: Intel
7145 Itanium 2 Reference Manual for Software Development and Optimization,
7148 record_memory_reference (rtx insn)
7150 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7152 switch (insn_class) {
7153 case ITANIUM_CLASS_FLD:
7154 case ITANIUM_CLASS_LD:
7155 mem_ops_in_group[current_cycle % 4]++;
7157 case ITANIUM_CLASS_STF:
7158 case ITANIUM_CLASS_ST:
7159 mem_ops_in_group[(current_cycle + 3) % 4]++;
7165 /* We are about to being issuing insns for this clock cycle.
7166 Override the default sort algorithm to better slot instructions. */
7169 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
7170 int *pn_ready, int clock_var,
7174 int n_ready = *pn_ready;
7175 rtx *e_ready = ready + n_ready;
7179 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7181 if (reorder_type == 0)
7183 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7185 for (insnp = ready; insnp < e_ready; insnp++)
7186 if (insnp < e_ready)
7189 enum attr_type t = ia64_safe_type (insn);
7190 if (t == TYPE_UNKNOWN)
7192 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7193 || asm_noperands (PATTERN (insn)) >= 0)
7195 rtx lowest = ready[n_asms];
7196 ready[n_asms] = insn;
7202 rtx highest = ready[n_ready - 1];
7203 ready[n_ready - 1] = insn;
7210 if (n_asms < n_ready)
7212 /* Some normal insns to process. Skip the asms. */
7216 else if (n_ready > 0)
7220 if (ia64_final_schedule)
7223 int nr_need_stop = 0;
7225 for (insnp = ready; insnp < e_ready; insnp++)
7226 if (safe_group_barrier_needed (*insnp))
7229 if (reorder_type == 1 && n_ready == nr_need_stop)
7231 if (reorder_type == 0)
7234 /* Move down everything that needs a stop bit, preserving
7236 while (insnp-- > ready + deleted)
7237 while (insnp >= ready + deleted)
7240 if (! safe_group_barrier_needed (insn))
7242 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7250 current_cycle = clock_var;
7251 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7256 /* Move down loads/stores, preserving relative order. */
7257 while (insnp-- > ready + moved)
7258 while (insnp >= ready + moved)
7261 if (! is_load_p (insn))
7263 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7274 /* We are about to being issuing insns for this clock cycle. Override
7275 the default sort algorithm to better slot instructions. */
7278 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7281 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7282 pn_ready, clock_var, 0);
7285 /* Like ia64_sched_reorder, but called after issuing each insn.
7286 Override the default sort algorithm to better slot instructions. */
7289 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7290 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7291 int *pn_ready, int clock_var)
7293 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7297 /* We are about to issue INSN. Return the number of insns left on the
7298 ready queue that can be issued this cycle. */
7301 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7302 int sched_verbose ATTRIBUTE_UNUSED,
7303 rtx insn ATTRIBUTE_UNUSED,
7304 int can_issue_more ATTRIBUTE_UNUSED)
7306 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7307 /* Modulo scheduling does not extend h_i_d when emitting
7308 new instructions. Don't use h_i_d, if we don't have to. */
7310 if (DONE_SPEC (insn) & BEGIN_DATA)
7311 pending_data_specs++;
7312 if (CHECK_SPEC (insn) & BEGIN_DATA)
7313 pending_data_specs--;
7316 if (DEBUG_INSN_P (insn))
7319 last_scheduled_insn = insn;
7320 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7321 if (reload_completed)
7323 int needed = group_barrier_needed (insn);
7325 gcc_assert (!needed);
7326 if (GET_CODE (insn) == CALL_INSN)
7327 init_insn_group_barriers ();
7328 stops_p [INSN_UID (insn)] = stop_before_p;
7331 record_memory_reference (insn);
7336 /* We are choosing insn from the ready queue. Return nonzero if INSN
7340 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7342 gcc_assert (insn && INSN_P (insn));
7343 return ((!reload_completed
7344 || !safe_group_barrier_needed (insn))
7345 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7346 && (!mflag_sched_mem_insns_hard_limit
7347 || !is_load_p (insn)
7348 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7351 /* We are choosing insn from the ready queue. Return nonzero if INSN
7355 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7357 gcc_assert (insn && INSN_P (insn));
7358 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7359 we keep ALAT half-empty. */
7360 return (pending_data_specs < 16
7361 || !(TODO_SPEC (insn) & BEGIN_DATA));
7364 /* The following variable value is pseudo-insn used by the DFA insn
7365 scheduler to change the DFA state when the simulated clock is
7368 static rtx dfa_pre_cycle_insn;
7370 /* Returns 1 when a meaningful insn was scheduled between the last group
7371 barrier and LAST. */
7373 scheduled_good_insn (rtx last)
7375 if (last && recog_memoized (last) >= 0)
7379 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7380 && !stops_p[INSN_UID (last)];
7381 last = PREV_INSN (last))
7382 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7383 the ebb we're scheduling. */
7384 if (INSN_P (last) && recog_memoized (last) >= 0)
7390 /* We are about to being issuing INSN. Return nonzero if we cannot
7391 issue it on given cycle CLOCK and return zero if we should not sort
7392 the ready queue on the next clock start. */
7395 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7396 int clock, int *sort_p)
7398 gcc_assert (insn && INSN_P (insn));
7400 if (DEBUG_INSN_P (insn))
7403 /* When a group barrier is needed for insn, last_scheduled_insn
7405 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7406 || last_scheduled_insn);
7408 if ((reload_completed
7409 && (safe_group_barrier_needed (insn)
7410 || (mflag_sched_stop_bits_after_every_cycle
7411 && last_clock != clock
7412 && last_scheduled_insn
7413 && scheduled_good_insn (last_scheduled_insn))))
7414 || (last_scheduled_insn
7415 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7416 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7417 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7419 init_insn_group_barriers ();
7421 if (verbose && dump)
7422 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7423 last_clock == clock ? " + cycle advance" : "");
7426 current_cycle = clock;
7427 mem_ops_in_group[current_cycle % 4] = 0;
7429 if (last_clock == clock)
7431 state_transition (curr_state, dfa_stop_insn);
7432 if (TARGET_EARLY_STOP_BITS)
7433 *sort_p = (last_scheduled_insn == NULL_RTX
7434 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7440 if (last_scheduled_insn)
7442 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7443 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7444 state_reset (curr_state);
7447 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7448 state_transition (curr_state, dfa_stop_insn);
7449 state_transition (curr_state, dfa_pre_cycle_insn);
7450 state_transition (curr_state, NULL);
7457 /* Implement targetm.sched.h_i_d_extended hook.
7458 Extend internal data structures. */
7460 ia64_h_i_d_extended (void)
7462 if (stops_p != NULL)
7464 int new_clocks_length = get_max_uid () * 3 / 2;
7465 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7466 clocks_length = new_clocks_length;
7471 /* This structure describes the data used by the backend to guide scheduling.
7472 When the current scheduling point is switched, this data should be saved
7473 and restored later, if the scheduler returns to this point. */
7474 struct _ia64_sched_context
7476 state_t prev_cycle_state;
7477 rtx last_scheduled_insn;
7478 struct reg_write_state rws_sum[NUM_REGS];
7479 struct reg_write_state rws_insn[NUM_REGS];
7480 int first_instruction;
7481 int pending_data_specs;
7483 char mem_ops_in_group[4];
7485 typedef struct _ia64_sched_context *ia64_sched_context_t;
7487 /* Allocates a scheduling context. */
7489 ia64_alloc_sched_context (void)
7491 return xmalloc (sizeof (struct _ia64_sched_context));
7494 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7495 the global context otherwise. */
7497 ia64_init_sched_context (void *_sc, bool clean_p)
7499 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7501 sc->prev_cycle_state = xmalloc (dfa_state_size);
7504 state_reset (sc->prev_cycle_state);
7505 sc->last_scheduled_insn = NULL_RTX;
7506 memset (sc->rws_sum, 0, sizeof (rws_sum));
7507 memset (sc->rws_insn, 0, sizeof (rws_insn));
7508 sc->first_instruction = 1;
7509 sc->pending_data_specs = 0;
7510 sc->current_cycle = 0;
7511 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7515 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7516 sc->last_scheduled_insn = last_scheduled_insn;
7517 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7518 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7519 sc->first_instruction = first_instruction;
7520 sc->pending_data_specs = pending_data_specs;
7521 sc->current_cycle = current_cycle;
7522 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7526 /* Sets the global scheduling context to the one pointed to by _SC. */
7528 ia64_set_sched_context (void *_sc)
7530 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7532 gcc_assert (sc != NULL);
7534 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7535 last_scheduled_insn = sc->last_scheduled_insn;
7536 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7537 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7538 first_instruction = sc->first_instruction;
7539 pending_data_specs = sc->pending_data_specs;
7540 current_cycle = sc->current_cycle;
7541 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7544 /* Clears the data in the _SC scheduling context. */
7546 ia64_clear_sched_context (void *_sc)
7548 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7550 free (sc->prev_cycle_state);
7551 sc->prev_cycle_state = NULL;
7554 /* Frees the _SC scheduling context. */
7556 ia64_free_sched_context (void *_sc)
7558 gcc_assert (_sc != NULL);
7563 typedef rtx (* gen_func_t) (rtx, rtx);
7565 /* Return a function that will generate a load of mode MODE_NO
7566 with speculation types TS. */
7568 get_spec_load_gen_function (ds_t ts, int mode_no)
7570 static gen_func_t gen_ld_[] = {
7580 gen_zero_extendqidi2,
7581 gen_zero_extendhidi2,
7582 gen_zero_extendsidi2,
7585 static gen_func_t gen_ld_a[] = {
7595 gen_zero_extendqidi2_advanced,
7596 gen_zero_extendhidi2_advanced,
7597 gen_zero_extendsidi2_advanced,
7599 static gen_func_t gen_ld_s[] = {
7600 gen_movbi_speculative,
7601 gen_movqi_speculative,
7602 gen_movhi_speculative,
7603 gen_movsi_speculative,
7604 gen_movdi_speculative,
7605 gen_movsf_speculative,
7606 gen_movdf_speculative,
7607 gen_movxf_speculative,
7608 gen_movti_speculative,
7609 gen_zero_extendqidi2_speculative,
7610 gen_zero_extendhidi2_speculative,
7611 gen_zero_extendsidi2_speculative,
7613 static gen_func_t gen_ld_sa[] = {
7614 gen_movbi_speculative_advanced,
7615 gen_movqi_speculative_advanced,
7616 gen_movhi_speculative_advanced,
7617 gen_movsi_speculative_advanced,
7618 gen_movdi_speculative_advanced,
7619 gen_movsf_speculative_advanced,
7620 gen_movdf_speculative_advanced,
7621 gen_movxf_speculative_advanced,
7622 gen_movti_speculative_advanced,
7623 gen_zero_extendqidi2_speculative_advanced,
7624 gen_zero_extendhidi2_speculative_advanced,
7625 gen_zero_extendsidi2_speculative_advanced,
7627 static gen_func_t gen_ld_s_a[] = {
7628 gen_movbi_speculative_a,
7629 gen_movqi_speculative_a,
7630 gen_movhi_speculative_a,
7631 gen_movsi_speculative_a,
7632 gen_movdi_speculative_a,
7633 gen_movsf_speculative_a,
7634 gen_movdf_speculative_a,
7635 gen_movxf_speculative_a,
7636 gen_movti_speculative_a,
7637 gen_zero_extendqidi2_speculative_a,
7638 gen_zero_extendhidi2_speculative_a,
7639 gen_zero_extendsidi2_speculative_a,
7644 if (ts & BEGIN_DATA)
7646 if (ts & BEGIN_CONTROL)
7651 else if (ts & BEGIN_CONTROL)
7653 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7654 || ia64_needs_block_p (ts))
7657 gen_ld = gen_ld_s_a;
7664 return gen_ld[mode_no];
7667 /* Constants that help mapping 'enum machine_mode' to int. */
7670 SPEC_MODE_INVALID = -1,
7671 SPEC_MODE_FIRST = 0,
7672 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7673 SPEC_MODE_FOR_EXTEND_LAST = 3,
7679 /* Offset to reach ZERO_EXTEND patterns. */
7680 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7683 /* Return index of the MODE. */
7685 ia64_mode_to_int (enum machine_mode mode)
7689 case BImode: return 0; /* SPEC_MODE_FIRST */
7690 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7691 case HImode: return 2;
7692 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7693 case DImode: return 4;
7694 case SFmode: return 5;
7695 case DFmode: return 6;
7696 case XFmode: return 7;
7698 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7699 mentioned in itanium[12].md. Predicate fp_register_operand also
7700 needs to be defined. Bottom line: better disable for now. */
7701 return SPEC_MODE_INVALID;
7702 default: return SPEC_MODE_INVALID;
7706 /* Provide information about speculation capabilities. */
7708 ia64_set_sched_flags (spec_info_t spec_info)
7710 unsigned int *flags = &(current_sched_info->flags);
7712 if (*flags & SCHED_RGN
7713 || *flags & SCHED_EBB
7714 || *flags & SEL_SCHED)
7718 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7719 || (mflag_sched_ar_data_spec && reload_completed))
7724 && ((mflag_sched_br_in_data_spec && !reload_completed)
7725 || (mflag_sched_ar_in_data_spec && reload_completed)))
7729 if (mflag_sched_control_spec
7731 || reload_completed))
7733 mask |= BEGIN_CONTROL;
7735 if (!sel_sched_p () && mflag_sched_in_control_spec)
7736 mask |= BE_IN_CONTROL;
7739 spec_info->mask = mask;
7743 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7745 if (mask & BE_IN_SPEC)
7748 spec_info->flags = 0;
7750 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7751 spec_info->flags |= PREFER_NON_DATA_SPEC;
7753 if (mask & CONTROL_SPEC)
7755 if (mflag_sched_prefer_non_control_spec_insns)
7756 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7758 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7759 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7762 if (sched_verbose >= 1)
7763 spec_info->dump = sched_dump;
7765 spec_info->dump = 0;
7767 if (mflag_sched_count_spec_in_critical_path)
7768 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7772 spec_info->mask = 0;
7775 /* If INSN is an appropriate load return its mode.
7776 Return -1 otherwise. */
7778 get_mode_no_for_insn (rtx insn)
7780 rtx reg, mem, mode_rtx;
7784 extract_insn_cached (insn);
7786 /* We use WHICH_ALTERNATIVE only after reload. This will
7787 guarantee that reload won't touch a speculative insn. */
7789 if (recog_data.n_operands != 2)
7792 reg = recog_data.operand[0];
7793 mem = recog_data.operand[1];
7795 /* We should use MEM's mode since REG's mode in presence of
7796 ZERO_EXTEND will always be DImode. */
7797 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7798 /* Process non-speculative ld. */
7800 if (!reload_completed)
7802 /* Do not speculate into regs like ar.lc. */
7803 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7810 rtx mem_reg = XEXP (mem, 0);
7812 if (!REG_P (mem_reg))
7818 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7820 gcc_assert (REG_P (reg) && MEM_P (mem));
7826 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7827 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7828 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7829 /* Process speculative ld or ld.c. */
7831 gcc_assert (REG_P (reg) && MEM_P (mem));
7836 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7838 if (attr_class == ITANIUM_CLASS_CHK_A
7839 || attr_class == ITANIUM_CLASS_CHK_S_I
7840 || attr_class == ITANIUM_CLASS_CHK_S_F)
7847 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7849 if (mode_no == SPEC_MODE_INVALID)
7852 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7856 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7857 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7860 mode_no += SPEC_GEN_EXTEND_OFFSET;
7866 /* If X is an unspec part of a speculative load, return its code.
7867 Return -1 otherwise. */
7869 get_spec_unspec_code (const_rtx x)
7871 if (GET_CODE (x) != UNSPEC)
7893 /* Implement skip_rtx_p hook. */
7895 ia64_skip_rtx_p (const_rtx x)
7897 return get_spec_unspec_code (x) != -1;
7900 /* If INSN is a speculative load, return its UNSPEC code.
7901 Return -1 otherwise. */
7903 get_insn_spec_code (const_rtx insn)
7907 pat = PATTERN (insn);
7909 if (GET_CODE (pat) == COND_EXEC)
7910 pat = COND_EXEC_CODE (pat);
7912 if (GET_CODE (pat) != SET)
7915 reg = SET_DEST (pat);
7919 mem = SET_SRC (pat);
7920 if (GET_CODE (mem) == ZERO_EXTEND)
7921 mem = XEXP (mem, 0);
7923 return get_spec_unspec_code (mem);
7926 /* If INSN is a speculative load, return a ds with the speculation types.
7927 Otherwise [if INSN is a normal instruction] return 0. */
7929 ia64_get_insn_spec_ds (rtx insn)
7931 int code = get_insn_spec_code (insn);
7940 return BEGIN_CONTROL;
7943 return BEGIN_DATA | BEGIN_CONTROL;
7950 /* If INSN is a speculative load return a ds with the speculation types that
7952 Otherwise [if INSN is a normal instruction] return 0. */
7954 ia64_get_insn_checked_ds (rtx insn)
7956 int code = get_insn_spec_code (insn);
7961 return BEGIN_DATA | BEGIN_CONTROL;
7964 return BEGIN_CONTROL;
7968 return BEGIN_DATA | BEGIN_CONTROL;
7975 /* If GEN_P is true, calculate the index of needed speculation check and return
7976 speculative pattern for INSN with speculative mode TS, machine mode
7977 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7978 If GEN_P is false, just calculate the index of needed speculation check. */
7980 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7983 gen_func_t gen_load;
7985 gen_load = get_spec_load_gen_function (ts, mode_no);
7987 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7988 copy_rtx (recog_data.operand[1]));
7990 pat = PATTERN (insn);
7991 if (GET_CODE (pat) == COND_EXEC)
7992 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7999 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8000 ds_t ds ATTRIBUTE_UNUSED)
8005 /* Implement targetm.sched.speculate_insn hook.
8006 Check if the INSN can be TS speculative.
8007 If 'no' - return -1.
8008 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8009 If current pattern of the INSN already provides TS speculation,
8012 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
8017 gcc_assert (!(ts & ~SPECULATIVE));
8019 if (ia64_spec_check_p (insn))
8022 if ((ts & BE_IN_SPEC)
8023 && !insn_can_be_in_speculative_p (insn, ts))
8026 mode_no = get_mode_no_for_insn (insn);
8028 if (mode_no != SPEC_MODE_INVALID)
8030 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8035 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8044 /* Return a function that will generate a check for speculation TS with mode
8046 If simple check is needed, pass true for SIMPLE_CHECK_P.
8047 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8049 get_spec_check_gen_function (ds_t ts, int mode_no,
8050 bool simple_check_p, bool clearing_check_p)
8052 static gen_func_t gen_ld_c_clr[] = {
8062 gen_zero_extendqidi2_clr,
8063 gen_zero_extendhidi2_clr,
8064 gen_zero_extendsidi2_clr,
8066 static gen_func_t gen_ld_c_nc[] = {
8076 gen_zero_extendqidi2_nc,
8077 gen_zero_extendhidi2_nc,
8078 gen_zero_extendsidi2_nc,
8080 static gen_func_t gen_chk_a_clr[] = {
8081 gen_advanced_load_check_clr_bi,
8082 gen_advanced_load_check_clr_qi,
8083 gen_advanced_load_check_clr_hi,
8084 gen_advanced_load_check_clr_si,
8085 gen_advanced_load_check_clr_di,
8086 gen_advanced_load_check_clr_sf,
8087 gen_advanced_load_check_clr_df,
8088 gen_advanced_load_check_clr_xf,
8089 gen_advanced_load_check_clr_ti,
8090 gen_advanced_load_check_clr_di,
8091 gen_advanced_load_check_clr_di,
8092 gen_advanced_load_check_clr_di,
8094 static gen_func_t gen_chk_a_nc[] = {
8095 gen_advanced_load_check_nc_bi,
8096 gen_advanced_load_check_nc_qi,
8097 gen_advanced_load_check_nc_hi,
8098 gen_advanced_load_check_nc_si,
8099 gen_advanced_load_check_nc_di,
8100 gen_advanced_load_check_nc_sf,
8101 gen_advanced_load_check_nc_df,
8102 gen_advanced_load_check_nc_xf,
8103 gen_advanced_load_check_nc_ti,
8104 gen_advanced_load_check_nc_di,
8105 gen_advanced_load_check_nc_di,
8106 gen_advanced_load_check_nc_di,
8108 static gen_func_t gen_chk_s[] = {
8109 gen_speculation_check_bi,
8110 gen_speculation_check_qi,
8111 gen_speculation_check_hi,
8112 gen_speculation_check_si,
8113 gen_speculation_check_di,
8114 gen_speculation_check_sf,
8115 gen_speculation_check_df,
8116 gen_speculation_check_xf,
8117 gen_speculation_check_ti,
8118 gen_speculation_check_di,
8119 gen_speculation_check_di,
8120 gen_speculation_check_di,
8123 gen_func_t *gen_check;
8125 if (ts & BEGIN_DATA)
8127 /* We don't need recovery because even if this is ld.sa
8128 ALAT entry will be allocated only if NAT bit is set to zero.
8129 So it is enough to use ld.c here. */
8133 gcc_assert (mflag_sched_spec_ldc);
8135 if (clearing_check_p)
8136 gen_check = gen_ld_c_clr;
8138 gen_check = gen_ld_c_nc;
8142 if (clearing_check_p)
8143 gen_check = gen_chk_a_clr;
8145 gen_check = gen_chk_a_nc;
8148 else if (ts & BEGIN_CONTROL)
8151 /* We might want to use ld.sa -> ld.c instead of
8154 gcc_assert (!ia64_needs_block_p (ts));
8156 if (clearing_check_p)
8157 gen_check = gen_ld_c_clr;
8159 gen_check = gen_ld_c_nc;
8163 gen_check = gen_chk_s;
8169 gcc_assert (mode_no >= 0);
8170 return gen_check[mode_no];
8173 /* Return nonzero, if INSN needs branchy recovery check. */
8175 ia64_needs_block_p (ds_t ts)
8177 if (ts & BEGIN_DATA)
8178 return !mflag_sched_spec_ldc;
8180 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8182 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8185 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8186 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8187 Otherwise, generate a simple check. */
8189 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
8191 rtx op1, pat, check_pat;
8192 gen_func_t gen_check;
8195 mode_no = get_mode_no_for_insn (insn);
8196 gcc_assert (mode_no >= 0);
8202 gcc_assert (!ia64_needs_block_p (ds));
8203 op1 = copy_rtx (recog_data.operand[1]);
8206 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8209 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8211 pat = PATTERN (insn);
8212 if (GET_CODE (pat) == COND_EXEC)
8213 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8219 /* Return nonzero, if X is branchy recovery check. */
8221 ia64_spec_check_p (rtx x)
8224 if (GET_CODE (x) == COND_EXEC)
8225 x = COND_EXEC_CODE (x);
8226 if (GET_CODE (x) == SET)
8227 return ia64_spec_check_src_p (SET_SRC (x));
8231 /* Return nonzero, if SRC belongs to recovery check. */
8233 ia64_spec_check_src_p (rtx src)
8235 if (GET_CODE (src) == IF_THEN_ELSE)
8240 if (GET_CODE (t) == NE)
8244 if (GET_CODE (t) == UNSPEC)
8250 if (code == UNSPEC_LDCCLR
8251 || code == UNSPEC_LDCNC
8252 || code == UNSPEC_CHKACLR
8253 || code == UNSPEC_CHKANC
8254 || code == UNSPEC_CHKS)
8256 gcc_assert (code != 0);
8266 /* The following page contains abstract data `bundle states' which are
8267 used for bundling insns (inserting nops and template generation). */
8269 /* The following describes state of insn bundling. */
8273 /* Unique bundle state number to identify them in the debugging
8276 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
8277 /* number nops before and after the insn */
8278 short before_nops_num, after_nops_num;
8279 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8281 int cost; /* cost of the state in cycles */
8282 int accumulated_insns_num; /* number of all previous insns including
8283 nops. L is considered as 2 insns */
8284 int branch_deviation; /* deviation of previous branches from 3rd slots */
8285 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8286 struct bundle_state *next; /* next state with the same insn_num */
8287 struct bundle_state *originator; /* originator (previous insn state) */
8288 /* All bundle states are in the following chain. */
8289 struct bundle_state *allocated_states_chain;
8290 /* The DFA State after issuing the insn and the nops. */
8294 /* The following is map insn number to the corresponding bundle state. */
8296 static struct bundle_state **index_to_bundle_states;
8298 /* The unique number of next bundle state. */
8300 static int bundle_states_num;
8302 /* All allocated bundle states are in the following chain. */
8304 static struct bundle_state *allocated_bundle_states_chain;
8306 /* All allocated but not used bundle states are in the following
8309 static struct bundle_state *free_bundle_state_chain;
8312 /* The following function returns a free bundle state. */
8314 static struct bundle_state *
8315 get_free_bundle_state (void)
8317 struct bundle_state *result;
8319 if (free_bundle_state_chain != NULL)
8321 result = free_bundle_state_chain;
8322 free_bundle_state_chain = result->next;
8326 result = XNEW (struct bundle_state);
8327 result->dfa_state = xmalloc (dfa_state_size);
8328 result->allocated_states_chain = allocated_bundle_states_chain;
8329 allocated_bundle_states_chain = result;
8331 result->unique_num = bundle_states_num++;
8336 /* The following function frees given bundle state. */
8339 free_bundle_state (struct bundle_state *state)
8341 state->next = free_bundle_state_chain;
8342 free_bundle_state_chain = state;
8345 /* Start work with abstract data `bundle states'. */
8348 initiate_bundle_states (void)
8350 bundle_states_num = 0;
8351 free_bundle_state_chain = NULL;
8352 allocated_bundle_states_chain = NULL;
8355 /* Finish work with abstract data `bundle states'. */
8358 finish_bundle_states (void)
8360 struct bundle_state *curr_state, *next_state;
8362 for (curr_state = allocated_bundle_states_chain;
8364 curr_state = next_state)
8366 next_state = curr_state->allocated_states_chain;
8367 free (curr_state->dfa_state);
8372 /* Hash table of the bundle states. The key is dfa_state and insn_num
8373 of the bundle states. */
8375 static htab_t bundle_state_table;
8377 /* The function returns hash of BUNDLE_STATE. */
8380 bundle_state_hash (const void *bundle_state)
8382 const struct bundle_state *const state
8383 = (const struct bundle_state *) bundle_state;
8386 for (result = i = 0; i < dfa_state_size; i++)
8387 result += (((unsigned char *) state->dfa_state) [i]
8388 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8389 return result + state->insn_num;
8392 /* The function returns nonzero if the bundle state keys are equal. */
8395 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8397 const struct bundle_state *const state1
8398 = (const struct bundle_state *) bundle_state_1;
8399 const struct bundle_state *const state2
8400 = (const struct bundle_state *) bundle_state_2;
8402 return (state1->insn_num == state2->insn_num
8403 && memcmp (state1->dfa_state, state2->dfa_state,
8404 dfa_state_size) == 0);
8407 /* The function inserts the BUNDLE_STATE into the hash table. The
8408 function returns nonzero if the bundle has been inserted into the
8409 table. The table contains the best bundle state with given key. */
8412 insert_bundle_state (struct bundle_state *bundle_state)
8416 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8417 if (*entry_ptr == NULL)
8419 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8420 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8421 *entry_ptr = (void *) bundle_state;
8424 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8425 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8426 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8427 > bundle_state->accumulated_insns_num
8428 || (((struct bundle_state *)
8429 *entry_ptr)->accumulated_insns_num
8430 == bundle_state->accumulated_insns_num
8431 && (((struct bundle_state *)
8432 *entry_ptr)->branch_deviation
8433 > bundle_state->branch_deviation
8434 || (((struct bundle_state *)
8435 *entry_ptr)->branch_deviation
8436 == bundle_state->branch_deviation
8437 && ((struct bundle_state *)
8438 *entry_ptr)->middle_bundle_stops
8439 > bundle_state->middle_bundle_stops))))))
8442 struct bundle_state temp;
8444 temp = *(struct bundle_state *) *entry_ptr;
8445 *(struct bundle_state *) *entry_ptr = *bundle_state;
8446 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8447 *bundle_state = temp;
8452 /* Start work with the hash table. */
8455 initiate_bundle_state_table (void)
8457 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8461 /* Finish work with the hash table. */
8464 finish_bundle_state_table (void)
8466 htab_delete (bundle_state_table);
8471 /* The following variable is a insn `nop' used to check bundle states
8472 with different number of inserted nops. */
8474 static rtx ia64_nop;
8476 /* The following function tries to issue NOPS_NUM nops for the current
8477 state without advancing processor cycle. If it failed, the
8478 function returns FALSE and frees the current state. */
8481 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8485 for (i = 0; i < nops_num; i++)
8486 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8488 free_bundle_state (curr_state);
8494 /* The following function tries to issue INSN for the current
8495 state without advancing processor cycle. If it failed, the
8496 function returns FALSE and frees the current state. */
8499 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8501 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8503 free_bundle_state (curr_state);
8509 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8510 starting with ORIGINATOR without advancing processor cycle. If
8511 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8512 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8513 If it was successful, the function creates new bundle state and
8514 insert into the hash table and into `index_to_bundle_states'. */
8517 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8518 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8520 struct bundle_state *curr_state;
8522 curr_state = get_free_bundle_state ();
8523 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8524 curr_state->insn = insn;
8525 curr_state->insn_num = originator->insn_num + 1;
8526 curr_state->cost = originator->cost;
8527 curr_state->originator = originator;
8528 curr_state->before_nops_num = before_nops_num;
8529 curr_state->after_nops_num = 0;
8530 curr_state->accumulated_insns_num
8531 = originator->accumulated_insns_num + before_nops_num;
8532 curr_state->branch_deviation = originator->branch_deviation;
8533 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8535 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8537 gcc_assert (GET_MODE (insn) != TImode);
8538 if (!try_issue_nops (curr_state, before_nops_num))
8540 if (!try_issue_insn (curr_state, insn))
8542 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8543 if (curr_state->accumulated_insns_num % 3 != 0)
8544 curr_state->middle_bundle_stops++;
8545 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8546 && curr_state->accumulated_insns_num % 3 != 0)
8548 free_bundle_state (curr_state);
8552 else if (GET_MODE (insn) != TImode)
8554 if (!try_issue_nops (curr_state, before_nops_num))
8556 if (!try_issue_insn (curr_state, insn))
8558 curr_state->accumulated_insns_num++;
8559 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8560 && asm_noperands (PATTERN (insn)) < 0);
8562 if (ia64_safe_type (insn) == TYPE_L)
8563 curr_state->accumulated_insns_num++;
8567 /* If this is an insn that must be first in a group, then don't allow
8568 nops to be emitted before it. Currently, alloc is the only such
8569 supported instruction. */
8570 /* ??? The bundling automatons should handle this for us, but they do
8571 not yet have support for the first_insn attribute. */
8572 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8574 free_bundle_state (curr_state);
8578 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8579 state_transition (curr_state->dfa_state, NULL);
8581 if (!try_issue_nops (curr_state, before_nops_num))
8583 if (!try_issue_insn (curr_state, insn))
8585 curr_state->accumulated_insns_num++;
8586 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8587 || asm_noperands (PATTERN (insn)) >= 0)
8589 /* Finish bundle containing asm insn. */
8590 curr_state->after_nops_num
8591 = 3 - curr_state->accumulated_insns_num % 3;
8592 curr_state->accumulated_insns_num
8593 += 3 - curr_state->accumulated_insns_num % 3;
8595 else if (ia64_safe_type (insn) == TYPE_L)
8596 curr_state->accumulated_insns_num++;
8598 if (ia64_safe_type (insn) == TYPE_B)
8599 curr_state->branch_deviation
8600 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8601 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8603 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8606 struct bundle_state *curr_state1;
8607 struct bundle_state *allocated_states_chain;
8609 curr_state1 = get_free_bundle_state ();
8610 dfa_state = curr_state1->dfa_state;
8611 allocated_states_chain = curr_state1->allocated_states_chain;
8612 *curr_state1 = *curr_state;
8613 curr_state1->dfa_state = dfa_state;
8614 curr_state1->allocated_states_chain = allocated_states_chain;
8615 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8617 curr_state = curr_state1;
8619 if (!try_issue_nops (curr_state,
8620 3 - curr_state->accumulated_insns_num % 3))
8622 curr_state->after_nops_num
8623 = 3 - curr_state->accumulated_insns_num % 3;
8624 curr_state->accumulated_insns_num
8625 += 3 - curr_state->accumulated_insns_num % 3;
8627 if (!insert_bundle_state (curr_state))
8628 free_bundle_state (curr_state);
8632 /* The following function returns position in the two window bundle
8636 get_max_pos (state_t state)
8638 if (cpu_unit_reservation_p (state, pos_6))
8640 else if (cpu_unit_reservation_p (state, pos_5))
8642 else if (cpu_unit_reservation_p (state, pos_4))
8644 else if (cpu_unit_reservation_p (state, pos_3))
8646 else if (cpu_unit_reservation_p (state, pos_2))
8648 else if (cpu_unit_reservation_p (state, pos_1))
8654 /* The function returns code of a possible template for given position
8655 and state. The function should be called only with 2 values of
8656 position equal to 3 or 6. We avoid generating F NOPs by putting
8657 templates containing F insns at the end of the template search
8658 because undocumented anomaly in McKinley derived cores which can
8659 cause stalls if an F-unit insn (including a NOP) is issued within a
8660 six-cycle window after reading certain application registers (such
8661 as ar.bsp). Furthermore, power-considerations also argue against
8662 the use of F-unit instructions unless they're really needed. */
8665 get_template (state_t state, int pos)
8670 if (cpu_unit_reservation_p (state, _0mmi_))
8672 else if (cpu_unit_reservation_p (state, _0mii_))
8674 else if (cpu_unit_reservation_p (state, _0mmb_))
8676 else if (cpu_unit_reservation_p (state, _0mib_))
8678 else if (cpu_unit_reservation_p (state, _0mbb_))
8680 else if (cpu_unit_reservation_p (state, _0bbb_))
8682 else if (cpu_unit_reservation_p (state, _0mmf_))
8684 else if (cpu_unit_reservation_p (state, _0mfi_))
8686 else if (cpu_unit_reservation_p (state, _0mfb_))
8688 else if (cpu_unit_reservation_p (state, _0mlx_))
8693 if (cpu_unit_reservation_p (state, _1mmi_))
8695 else if (cpu_unit_reservation_p (state, _1mii_))
8697 else if (cpu_unit_reservation_p (state, _1mmb_))
8699 else if (cpu_unit_reservation_p (state, _1mib_))
8701 else if (cpu_unit_reservation_p (state, _1mbb_))
8703 else if (cpu_unit_reservation_p (state, _1bbb_))
8705 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8707 else if (cpu_unit_reservation_p (state, _1mfi_))
8709 else if (cpu_unit_reservation_p (state, _1mfb_))
8711 else if (cpu_unit_reservation_p (state, _1mlx_))
8720 /* True when INSN is important for bundling. */
8722 important_for_bundling_p (rtx insn)
8724 return (INSN_P (insn)
8725 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8726 && GET_CODE (PATTERN (insn)) != USE
8727 && GET_CODE (PATTERN (insn)) != CLOBBER);
8730 /* The following function returns an insn important for insn bundling
8731 followed by INSN and before TAIL. */
8734 get_next_important_insn (rtx insn, rtx tail)
8736 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8737 if (important_for_bundling_p (insn))
8742 /* Add a bundle selector TEMPLATE0 before INSN. */
8745 ia64_add_bundle_selector_before (int template0, rtx insn)
8747 rtx b = gen_bundle_selector (GEN_INT (template0));
8749 ia64_emit_insn_before (b, insn);
8750 #if NR_BUNDLES == 10
8751 if ((template0 == 4 || template0 == 5)
8752 && ia64_except_unwind_info (&global_options) == UI_TARGET)
8755 rtx note = NULL_RTX;
8757 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8758 first or second slot. If it is and has REG_EH_NOTE set, copy it
8759 to following nops, as br.call sets rp to the address of following
8760 bundle and therefore an EH region end must be on a bundle
8762 insn = PREV_INSN (insn);
8763 for (i = 0; i < 3; i++)
8766 insn = next_active_insn (insn);
8767 while (GET_CODE (insn) == INSN
8768 && get_attr_empty (insn) == EMPTY_YES);
8769 if (GET_CODE (insn) == CALL_INSN)
8770 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8775 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8776 || code == CODE_FOR_nop_b);
8777 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8780 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8787 /* The following function does insn bundling. Bundling means
8788 inserting templates and nop insns to fit insn groups into permitted
8789 templates. Instruction scheduling uses NDFA (non-deterministic
8790 finite automata) encoding informations about the templates and the
8791 inserted nops. Nondeterminism of the automata permits follows
8792 all possible insn sequences very fast.
8794 Unfortunately it is not possible to get information about inserting
8795 nop insns and used templates from the automata states. The
8796 automata only says that we can issue an insn possibly inserting
8797 some nops before it and using some template. Therefore insn
8798 bundling in this function is implemented by using DFA
8799 (deterministic finite automata). We follow all possible insn
8800 sequences by inserting 0-2 nops (that is what the NDFA describe for
8801 insn scheduling) before/after each insn being bundled. We know the
8802 start of simulated processor cycle from insn scheduling (insn
8803 starting a new cycle has TImode).
8805 Simple implementation of insn bundling would create enormous
8806 number of possible insn sequences satisfying information about new
8807 cycle ticks taken from the insn scheduling. To make the algorithm
8808 practical we use dynamic programming. Each decision (about
8809 inserting nops and implicitly about previous decisions) is described
8810 by structure bundle_state (see above). If we generate the same
8811 bundle state (key is automaton state after issuing the insns and
8812 nops for it), we reuse already generated one. As consequence we
8813 reject some decisions which cannot improve the solution and
8814 reduce memory for the algorithm.
8816 When we reach the end of EBB (extended basic block), we choose the
8817 best sequence and then, moving back in EBB, insert templates for
8818 the best alternative. The templates are taken from querying
8819 automaton state for each insn in chosen bundle states.
8821 So the algorithm makes two (forward and backward) passes through
8825 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8827 struct bundle_state *curr_state, *next_state, *best_state;
8828 rtx insn, next_insn;
8830 int i, bundle_end_p, only_bundle_end_p, asm_p;
8831 int pos = 0, max_pos, template0, template1;
8834 enum attr_type type;
8837 /* Count insns in the EBB. */
8838 for (insn = NEXT_INSN (prev_head_insn);
8839 insn && insn != tail;
8840 insn = NEXT_INSN (insn))
8846 dfa_clean_insn_cache ();
8847 initiate_bundle_state_table ();
8848 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8849 /* First (forward) pass -- generation of bundle states. */
8850 curr_state = get_free_bundle_state ();
8851 curr_state->insn = NULL;
8852 curr_state->before_nops_num = 0;
8853 curr_state->after_nops_num = 0;
8854 curr_state->insn_num = 0;
8855 curr_state->cost = 0;
8856 curr_state->accumulated_insns_num = 0;
8857 curr_state->branch_deviation = 0;
8858 curr_state->middle_bundle_stops = 0;
8859 curr_state->next = NULL;
8860 curr_state->originator = NULL;
8861 state_reset (curr_state->dfa_state);
8862 index_to_bundle_states [0] = curr_state;
8864 /* Shift cycle mark if it is put on insn which could be ignored. */
8865 for (insn = NEXT_INSN (prev_head_insn);
8867 insn = NEXT_INSN (insn))
8869 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8870 || GET_CODE (PATTERN (insn)) == USE
8871 || GET_CODE (PATTERN (insn)) == CLOBBER)
8872 && GET_MODE (insn) == TImode)
8874 PUT_MODE (insn, VOIDmode);
8875 for (next_insn = NEXT_INSN (insn);
8877 next_insn = NEXT_INSN (next_insn))
8878 if (INSN_P (next_insn)
8879 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8880 && GET_CODE (PATTERN (next_insn)) != USE
8881 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8882 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8884 PUT_MODE (next_insn, TImode);
8888 /* Forward pass: generation of bundle states. */
8889 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8893 gcc_assert (INSN_P (insn)
8894 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8895 && GET_CODE (PATTERN (insn)) != USE
8896 && GET_CODE (PATTERN (insn)) != CLOBBER);
8897 type = ia64_safe_type (insn);
8898 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8900 index_to_bundle_states [insn_num] = NULL;
8901 for (curr_state = index_to_bundle_states [insn_num - 1];
8903 curr_state = next_state)
8905 pos = curr_state->accumulated_insns_num % 3;
8906 next_state = curr_state->next;
8907 /* We must fill up the current bundle in order to start a
8908 subsequent asm insn in a new bundle. Asm insn is always
8909 placed in a separate bundle. */
8911 = (next_insn != NULL_RTX
8912 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8913 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8914 /* We may fill up the current bundle if it is the cycle end
8915 without a group barrier. */
8917 = (only_bundle_end_p || next_insn == NULL_RTX
8918 || (GET_MODE (next_insn) == TImode
8919 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8920 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8922 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8924 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8926 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8929 gcc_assert (index_to_bundle_states [insn_num]);
8930 for (curr_state = index_to_bundle_states [insn_num];
8932 curr_state = curr_state->next)
8933 if (verbose >= 2 && dump)
8935 /* This structure is taken from generated code of the
8936 pipeline hazard recognizer (see file insn-attrtab.c).
8937 Please don't forget to change the structure if a new
8938 automaton is added to .md file. */
8941 unsigned short one_automaton_state;
8942 unsigned short oneb_automaton_state;
8943 unsigned short two_automaton_state;
8944 unsigned short twob_automaton_state;
8949 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8950 curr_state->unique_num,
8951 (curr_state->originator == NULL
8952 ? -1 : curr_state->originator->unique_num),
8954 curr_state->before_nops_num, curr_state->after_nops_num,
8955 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8956 curr_state->middle_bundle_stops,
8957 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8962 /* We should find a solution because the 2nd insn scheduling has
8964 gcc_assert (index_to_bundle_states [insn_num]);
8965 /* Find a state corresponding to the best insn sequence. */
8967 for (curr_state = index_to_bundle_states [insn_num];
8969 curr_state = curr_state->next)
8970 /* We are just looking at the states with fully filled up last
8971 bundle. The first we prefer insn sequences with minimal cost
8972 then with minimal inserted nops and finally with branch insns
8973 placed in the 3rd slots. */
8974 if (curr_state->accumulated_insns_num % 3 == 0
8975 && (best_state == NULL || best_state->cost > curr_state->cost
8976 || (best_state->cost == curr_state->cost
8977 && (curr_state->accumulated_insns_num
8978 < best_state->accumulated_insns_num
8979 || (curr_state->accumulated_insns_num
8980 == best_state->accumulated_insns_num
8981 && (curr_state->branch_deviation
8982 < best_state->branch_deviation
8983 || (curr_state->branch_deviation
8984 == best_state->branch_deviation
8985 && curr_state->middle_bundle_stops
8986 < best_state->middle_bundle_stops)))))))
8987 best_state = curr_state;
8988 /* Second (backward) pass: adding nops and templates. */
8989 gcc_assert (best_state);
8990 insn_num = best_state->before_nops_num;
8991 template0 = template1 = -1;
8992 for (curr_state = best_state;
8993 curr_state->originator != NULL;
8994 curr_state = curr_state->originator)
8996 insn = curr_state->insn;
8997 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8998 || asm_noperands (PATTERN (insn)) >= 0);
9000 if (verbose >= 2 && dump)
9004 unsigned short one_automaton_state;
9005 unsigned short oneb_automaton_state;
9006 unsigned short two_automaton_state;
9007 unsigned short twob_automaton_state;
9012 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9013 curr_state->unique_num,
9014 (curr_state->originator == NULL
9015 ? -1 : curr_state->originator->unique_num),
9017 curr_state->before_nops_num, curr_state->after_nops_num,
9018 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9019 curr_state->middle_bundle_stops,
9020 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9023 /* Find the position in the current bundle window. The window can
9024 contain at most two bundles. Two bundle window means that
9025 the processor will make two bundle rotation. */
9026 max_pos = get_max_pos (curr_state->dfa_state);
9028 /* The following (negative template number) means that the
9029 processor did one bundle rotation. */
9030 || (max_pos == 3 && template0 < 0))
9032 /* We are at the end of the window -- find template(s) for
9036 template0 = get_template (curr_state->dfa_state, 3);
9039 template1 = get_template (curr_state->dfa_state, 3);
9040 template0 = get_template (curr_state->dfa_state, 6);
9043 if (max_pos > 3 && template1 < 0)
9044 /* It may happen when we have the stop inside a bundle. */
9046 gcc_assert (pos <= 3);
9047 template1 = get_template (curr_state->dfa_state, 3);
9051 /* Emit nops after the current insn. */
9052 for (i = 0; i < curr_state->after_nops_num; i++)
9055 emit_insn_after (nop, insn);
9057 gcc_assert (pos >= 0);
9060 /* We are at the start of a bundle: emit the template
9061 (it should be defined). */
9062 gcc_assert (template0 >= 0);
9063 ia64_add_bundle_selector_before (template0, nop);
9064 /* If we have two bundle window, we make one bundle
9065 rotation. Otherwise template0 will be undefined
9066 (negative value). */
9067 template0 = template1;
9071 /* Move the position backward in the window. Group barrier has
9072 no slot. Asm insn takes all bundle. */
9073 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9074 && GET_CODE (PATTERN (insn)) != ASM_INPUT
9075 && asm_noperands (PATTERN (insn)) < 0)
9077 /* Long insn takes 2 slots. */
9078 if (ia64_safe_type (insn) == TYPE_L)
9080 gcc_assert (pos >= 0);
9082 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9083 && GET_CODE (PATTERN (insn)) != ASM_INPUT
9084 && asm_noperands (PATTERN (insn)) < 0)
9086 /* The current insn is at the bundle start: emit the
9088 gcc_assert (template0 >= 0);
9089 ia64_add_bundle_selector_before (template0, insn);
9090 b = PREV_INSN (insn);
9092 /* See comment above in analogous place for emitting nops
9094 template0 = template1;
9097 /* Emit nops after the current insn. */
9098 for (i = 0; i < curr_state->before_nops_num; i++)
9101 ia64_emit_insn_before (nop, insn);
9102 nop = PREV_INSN (insn);
9105 gcc_assert (pos >= 0);
9108 /* See comment above in analogous place for emitting nops
9110 gcc_assert (template0 >= 0);
9111 ia64_add_bundle_selector_before (template0, insn);
9112 b = PREV_INSN (insn);
9114 template0 = template1;
9120 #ifdef ENABLE_CHECKING
9122 /* Assert right calculation of middle_bundle_stops. */
9123 int num = best_state->middle_bundle_stops;
9124 bool start_bundle = true, end_bundle = false;
9126 for (insn = NEXT_INSN (prev_head_insn);
9127 insn && insn != tail;
9128 insn = NEXT_INSN (insn))
9132 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9133 start_bundle = true;
9138 for (next_insn = NEXT_INSN (insn);
9139 next_insn && next_insn != tail;
9140 next_insn = NEXT_INSN (next_insn))
9141 if (INSN_P (next_insn)
9142 && (ia64_safe_itanium_class (next_insn)
9143 != ITANIUM_CLASS_IGNORE
9144 || recog_memoized (next_insn)
9145 == CODE_FOR_bundle_selector)
9146 && GET_CODE (PATTERN (next_insn)) != USE
9147 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9150 end_bundle = next_insn == NULL_RTX
9151 || next_insn == tail
9152 || (INSN_P (next_insn)
9153 && recog_memoized (next_insn)
9154 == CODE_FOR_bundle_selector);
9155 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9156 && !start_bundle && !end_bundle
9158 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
9159 && asm_noperands (PATTERN (next_insn)) < 0)
9162 start_bundle = false;
9166 gcc_assert (num == 0);
9170 free (index_to_bundle_states);
9171 finish_bundle_state_table ();
9173 dfa_clean_insn_cache ();
9176 /* The following function is called at the end of scheduling BB or
9177 EBB. After reload, it inserts stop bits and does insn bundling. */
9180 ia64_sched_finish (FILE *dump, int sched_verbose)
9183 fprintf (dump, "// Finishing schedule.\n");
9184 if (!reload_completed)
9186 if (reload_completed)
9188 final_emit_insn_group_barriers (dump);
9189 bundling (dump, sched_verbose, current_sched_info->prev_head,
9190 current_sched_info->next_tail);
9191 if (sched_verbose && dump)
9192 fprintf (dump, "// finishing %d-%d\n",
9193 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9194 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9200 /* The following function inserts stop bits in scheduled BB or EBB. */
9203 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9206 int need_barrier_p = 0;
9207 int seen_good_insn = 0;
9209 init_insn_group_barriers ();
9211 for (insn = NEXT_INSN (current_sched_info->prev_head);
9212 insn != current_sched_info->next_tail;
9213 insn = NEXT_INSN (insn))
9215 if (GET_CODE (insn) == BARRIER)
9217 rtx last = prev_active_insn (insn);
9221 if (GET_CODE (last) == JUMP_INSN
9222 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9223 last = prev_active_insn (last);
9224 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9225 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9227 init_insn_group_barriers ();
9231 else if (NONDEBUG_INSN_P (insn))
9233 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9235 init_insn_group_barriers ();
9239 else if (need_barrier_p || group_barrier_needed (insn)
9240 || (mflag_sched_stop_bits_after_every_cycle
9241 && GET_MODE (insn) == TImode
9244 if (TARGET_EARLY_STOP_BITS)
9249 last != current_sched_info->prev_head;
9250 last = PREV_INSN (last))
9251 if (INSN_P (last) && GET_MODE (last) == TImode
9252 && stops_p [INSN_UID (last)])
9254 if (last == current_sched_info->prev_head)
9256 last = prev_active_insn (last);
9258 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9259 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9261 init_insn_group_barriers ();
9262 for (last = NEXT_INSN (last);
9264 last = NEXT_INSN (last))
9267 group_barrier_needed (last);
9268 if (recog_memoized (last) >= 0
9269 && important_for_bundling_p (last))
9275 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9277 init_insn_group_barriers ();
9280 group_barrier_needed (insn);
9281 if (recog_memoized (insn) >= 0
9282 && important_for_bundling_p (insn))
9285 else if (recog_memoized (insn) >= 0
9286 && important_for_bundling_p (insn))
9288 need_barrier_p = (GET_CODE (insn) == CALL_INSN
9289 || GET_CODE (PATTERN (insn)) == ASM_INPUT
9290 || asm_noperands (PATTERN (insn)) >= 0);
9297 /* If the following function returns TRUE, we will use the DFA
9301 ia64_first_cycle_multipass_dfa_lookahead (void)
9303 return (reload_completed ? 6 : 4);
9306 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9309 ia64_init_dfa_pre_cycle_insn (void)
9311 if (temp_dfa_state == NULL)
9313 dfa_state_size = state_size ();
9314 temp_dfa_state = xmalloc (dfa_state_size);
9315 prev_cycle_state = xmalloc (dfa_state_size);
9317 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9318 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9319 recog_memoized (dfa_pre_cycle_insn);
9320 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9321 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9322 recog_memoized (dfa_stop_insn);
9325 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9326 used by the DFA insn scheduler. */
9329 ia64_dfa_pre_cycle_insn (void)
9331 return dfa_pre_cycle_insn;
9334 /* The following function returns TRUE if PRODUCER (of type ilog or
9335 ld) produces address for CONSUMER (of type st or stf). */
9338 ia64_st_address_bypass_p (rtx producer, rtx consumer)
9342 gcc_assert (producer && consumer);
9343 dest = ia64_single_set (producer);
9345 reg = SET_DEST (dest);
9347 if (GET_CODE (reg) == SUBREG)
9348 reg = SUBREG_REG (reg);
9349 gcc_assert (GET_CODE (reg) == REG);
9351 dest = ia64_single_set (consumer);
9353 mem = SET_DEST (dest);
9354 gcc_assert (mem && GET_CODE (mem) == MEM);
9355 return reg_mentioned_p (reg, mem);
9358 /* The following function returns TRUE if PRODUCER (of type ilog or
9359 ld) produces address for CONSUMER (of type ld or fld). */
9362 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9364 rtx dest, src, reg, mem;
9366 gcc_assert (producer && consumer);
9367 dest = ia64_single_set (producer);
9369 reg = SET_DEST (dest);
9371 if (GET_CODE (reg) == SUBREG)
9372 reg = SUBREG_REG (reg);
9373 gcc_assert (GET_CODE (reg) == REG);
9375 src = ia64_single_set (consumer);
9377 mem = SET_SRC (src);
9380 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9381 mem = XVECEXP (mem, 0, 0);
9382 else if (GET_CODE (mem) == IF_THEN_ELSE)
9383 /* ??? Is this bypass necessary for ld.c? */
9385 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9386 mem = XEXP (mem, 1);
9389 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9390 mem = XEXP (mem, 0);
9392 if (GET_CODE (mem) == UNSPEC)
9394 int c = XINT (mem, 1);
9396 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9397 || c == UNSPEC_LDSA);
9398 mem = XVECEXP (mem, 0, 0);
9401 /* Note that LO_SUM is used for GOT loads. */
9402 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9404 return reg_mentioned_p (reg, mem);
9407 /* The following function returns TRUE if INSN produces address for a
9408 load/store insn. We will place such insns into M slot because it
9409 decreases its latency time. */
9412 ia64_produce_address_p (rtx insn)
9418 /* Emit pseudo-ops for the assembler to describe predicate relations.
9419 At present this assumes that we only consider predicate pairs to
9420 be mutex, and that the assembler can deduce proper values from
9421 straight-line code. */
9424 emit_predicate_relation_info (void)
9428 FOR_EACH_BB_REVERSE (bb)
9431 rtx head = BB_HEAD (bb);
9433 /* We only need such notes at code labels. */
9434 if (GET_CODE (head) != CODE_LABEL)
9436 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9437 head = NEXT_INSN (head);
9439 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9440 grabbing the entire block of predicate registers. */
9441 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9442 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9444 rtx p = gen_rtx_REG (BImode, r);
9445 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9446 if (head == BB_END (bb))
9452 /* Look for conditional calls that do not return, and protect predicate
9453 relations around them. Otherwise the assembler will assume the call
9454 returns, and complain about uses of call-clobbered predicates after
9456 FOR_EACH_BB_REVERSE (bb)
9458 rtx insn = BB_HEAD (bb);
9462 if (GET_CODE (insn) == CALL_INSN
9463 && GET_CODE (PATTERN (insn)) == COND_EXEC
9464 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9466 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9467 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9468 if (BB_HEAD (bb) == insn)
9470 if (BB_END (bb) == insn)
9474 if (insn == BB_END (bb))
9476 insn = NEXT_INSN (insn);
9481 /* Perform machine dependent operations on the rtl chain INSNS. */
9486 /* We are freeing block_for_insn in the toplev to keep compatibility
9487 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9488 compute_bb_for_insn ();
9490 /* If optimizing, we'll have split before scheduling. */
9494 if (optimize && flag_schedule_insns_after_reload
9495 && dbg_cnt (ia64_sched2))
9498 timevar_push (TV_SCHED2);
9499 ia64_final_schedule = 1;
9501 /* We can't let modulo-sched prevent us from scheduling any bbs,
9502 since we need the final schedule to produce bundle information. */
9504 bb->flags &= ~BB_DISABLE_SCHEDULE;
9506 initiate_bundle_states ();
9507 ia64_nop = make_insn_raw (gen_nop ());
9508 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9509 recog_memoized (ia64_nop);
9510 clocks_length = get_max_uid () + 1;
9511 stops_p = XCNEWVEC (char, clocks_length);
9513 if (ia64_tune == PROCESSOR_ITANIUM2)
9515 pos_1 = get_cpu_unit_code ("2_1");
9516 pos_2 = get_cpu_unit_code ("2_2");
9517 pos_3 = get_cpu_unit_code ("2_3");
9518 pos_4 = get_cpu_unit_code ("2_4");
9519 pos_5 = get_cpu_unit_code ("2_5");
9520 pos_6 = get_cpu_unit_code ("2_6");
9521 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9522 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9523 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9524 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9525 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9526 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9527 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9528 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9529 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9530 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9531 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9532 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9533 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9534 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9535 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9536 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9537 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9538 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9539 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9540 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9544 pos_1 = get_cpu_unit_code ("1_1");
9545 pos_2 = get_cpu_unit_code ("1_2");
9546 pos_3 = get_cpu_unit_code ("1_3");
9547 pos_4 = get_cpu_unit_code ("1_4");
9548 pos_5 = get_cpu_unit_code ("1_5");
9549 pos_6 = get_cpu_unit_code ("1_6");
9550 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9551 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9552 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9553 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9554 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9555 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9556 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9557 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9558 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9559 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9560 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9561 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9562 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9563 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9564 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9565 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9566 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9567 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9568 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9569 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9572 if (flag_selective_scheduling2
9573 && !maybe_skip_selective_scheduling ())
9574 run_selective_scheduling ();
9578 /* Redo alignment computation, as it might gone wrong. */
9579 compute_alignments ();
9581 /* We cannot reuse this one because it has been corrupted by the
9583 finish_bundle_states ();
9586 emit_insn_group_barriers (dump_file);
9588 ia64_final_schedule = 0;
9589 timevar_pop (TV_SCHED2);
9592 emit_all_insn_group_barriers (dump_file);
9596 /* A call must not be the last instruction in a function, so that the
9597 return address is still within the function, so that unwinding works
9598 properly. Note that IA-64 differs from dwarf2 on this point. */
9599 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9604 insn = get_last_insn ();
9605 if (! INSN_P (insn))
9606 insn = prev_active_insn (insn);
9609 /* Skip over insns that expand to nothing. */
9610 while (GET_CODE (insn) == INSN
9611 && get_attr_empty (insn) == EMPTY_YES)
9613 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9614 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9616 insn = prev_active_insn (insn);
9618 if (GET_CODE (insn) == CALL_INSN)
9621 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9622 emit_insn (gen_break_f ());
9623 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9628 emit_predicate_relation_info ();
9630 if (flag_var_tracking)
9632 timevar_push (TV_VAR_TRACKING);
9633 variable_tracking_main ();
9634 timevar_pop (TV_VAR_TRACKING);
9636 df_finish_pass (false);
9639 /* Return true if REGNO is used by the epilogue. */
9642 ia64_epilogue_uses (int regno)
9647 /* With a call to a function in another module, we will write a new
9648 value to "gp". After returning from such a call, we need to make
9649 sure the function restores the original gp-value, even if the
9650 function itself does not use the gp anymore. */
9651 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9653 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9654 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9655 /* For functions defined with the syscall_linkage attribute, all
9656 input registers are marked as live at all function exits. This
9657 prevents the register allocator from using the input registers,
9658 which in turn makes it possible to restart a system call after
9659 an interrupt without having to save/restore the input registers.
9660 This also prevents kernel data from leaking to application code. */
9661 return lookup_attribute ("syscall_linkage",
9662 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9665 /* Conditional return patterns can't represent the use of `b0' as
9666 the return address, so we force the value live this way. */
9670 /* Likewise for ar.pfs, which is used by br.ret. */
9678 /* Return true if REGNO is used by the frame unwinder. */
9681 ia64_eh_uses (int regno)
9685 if (! reload_completed)
9691 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9692 if (regno == current_frame_info.r[r]
9693 || regno == emitted_frame_related_regs[r])
9699 /* Return true if this goes in small data/bss. */
9701 /* ??? We could also support own long data here. Generating movl/add/ld8
9702 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9703 code faster because there is one less load. This also includes incomplete
9704 types which can't go in sdata/sbss. */
9707 ia64_in_small_data_p (const_tree exp)
9709 if (TARGET_NO_SDATA)
9712 /* We want to merge strings, so we never consider them small data. */
9713 if (TREE_CODE (exp) == STRING_CST)
9716 /* Functions are never small data. */
9717 if (TREE_CODE (exp) == FUNCTION_DECL)
9720 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9722 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9724 if (strcmp (section, ".sdata") == 0
9725 || strncmp (section, ".sdata.", 7) == 0
9726 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9727 || strcmp (section, ".sbss") == 0
9728 || strncmp (section, ".sbss.", 6) == 0
9729 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9734 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9736 /* If this is an incomplete type with size 0, then we can't put it
9737 in sdata because it might be too big when completed. */
9738 if (size > 0 && size <= ia64_section_threshold)
9745 /* Output assembly directives for prologue regions. */
9747 /* The current basic block number. */
9749 static bool last_block;
9751 /* True if we need a copy_state command at the start of the next block. */
9753 static bool need_copy_state;
9755 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9756 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9759 /* The function emits unwind directives for the start of an epilogue. */
9762 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9763 bool unwind, bool frame ATTRIBUTE_UNUSED)
9765 /* If this isn't the last block of the function, then we need to label the
9766 current state, and copy it back in at the start of the next block. */
9771 fprintf (asm_out_file, "\t.label_state %d\n",
9772 ++cfun->machine->state_num);
9773 need_copy_state = true;
9777 fprintf (asm_out_file, "\t.restore sp\n");
9780 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9783 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9784 bool unwind, bool frame)
9786 rtx dest = SET_DEST (pat);
9787 rtx src = SET_SRC (pat);
9789 if (dest == stack_pointer_rtx)
9791 if (GET_CODE (src) == PLUS)
9793 rtx op0 = XEXP (src, 0);
9794 rtx op1 = XEXP (src, 1);
9796 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9798 if (INTVAL (op1) < 0)
9800 gcc_assert (!frame_pointer_needed);
9802 fprintf (asm_out_file,
9803 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9807 process_epilogue (asm_out_file, insn, unwind, frame);
9811 gcc_assert (src == hard_frame_pointer_rtx);
9812 process_epilogue (asm_out_file, insn, unwind, frame);
9815 else if (dest == hard_frame_pointer_rtx)
9817 gcc_assert (src == stack_pointer_rtx);
9818 gcc_assert (frame_pointer_needed);
9821 fprintf (asm_out_file, "\t.vframe r%d\n",
9822 ia64_dbx_register_number (REGNO (dest)));
9828 /* This function processes a SET pattern for REG_CFA_REGISTER. */
9831 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
9833 rtx dest = SET_DEST (pat);
9834 rtx src = SET_SRC (pat);
9835 int dest_regno = REGNO (dest);
9840 /* Saving return address pointer. */
9842 fprintf (asm_out_file, "\t.save rp, r%d\n",
9843 ia64_dbx_register_number (dest_regno));
9847 src_regno = REGNO (src);
9852 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9854 fprintf (asm_out_file, "\t.save pr, r%d\n",
9855 ia64_dbx_register_number (dest_regno));
9858 case AR_UNAT_REGNUM:
9859 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9861 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9862 ia64_dbx_register_number (dest_regno));
9866 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9868 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9869 ia64_dbx_register_number (dest_regno));
9873 /* Everything else should indicate being stored to memory. */
9878 /* This function processes a SET pattern for REG_CFA_OFFSET. */
9881 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
9883 rtx dest = SET_DEST (pat);
9884 rtx src = SET_SRC (pat);
9885 int src_regno = REGNO (src);
9890 gcc_assert (MEM_P (dest));
9891 if (GET_CODE (XEXP (dest, 0)) == REG)
9893 base = XEXP (dest, 0);
9898 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9899 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9900 base = XEXP (XEXP (dest, 0), 0);
9901 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9904 if (base == hard_frame_pointer_rtx)
9906 saveop = ".savepsp";
9911 gcc_assert (base == stack_pointer_rtx);
9915 src_regno = REGNO (src);
9919 gcc_assert (!current_frame_info.r[reg_save_b0]);
9921 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
9926 gcc_assert (!current_frame_info.r[reg_save_pr]);
9928 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
9933 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9935 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
9940 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9942 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
9946 case AR_UNAT_REGNUM:
9947 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9949 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
9958 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9959 1 << (src_regno - GR_REG (4)));
9968 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9969 1 << (src_regno - BR_REG (1)));
9977 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9978 1 << (src_regno - FR_REG (2)));
9981 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9982 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9983 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9984 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9986 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9987 1 << (src_regno - FR_REG (12)));
9991 /* ??? For some reason we mark other general registers, even those
9992 we can't represent in the unwind info. Ignore them. */
9997 /* This function looks at a single insn and emits any directives
9998 required to unwind this insn. */
10001 ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
10003 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10004 bool frame = dwarf2out_do_frame ();
10008 if (!unwind && !frame)
10011 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10013 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
10015 /* Restore unwind state from immediately before the epilogue. */
10016 if (need_copy_state)
10020 fprintf (asm_out_file, "\t.body\n");
10021 fprintf (asm_out_file, "\t.copy_state %d\n",
10022 cfun->machine->state_num);
10024 need_copy_state = false;
10028 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
10031 /* Look for the ALLOC insn. */
10032 if (INSN_CODE (insn) == CODE_FOR_alloc)
10034 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10035 int dest_regno = REGNO (dest);
10037 /* If this is the final destination for ar.pfs, then this must
10038 be the alloc in the prologue. */
10039 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10042 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10043 ia64_dbx_register_number (dest_regno));
10047 /* This must be an alloc before a sibcall. We must drop the
10048 old frame info. The easiest way to drop the old frame
10049 info is to ensure we had a ".restore sp" directive
10050 followed by a new prologue. If the procedure doesn't
10051 have a memory-stack frame, we'll issue a dummy ".restore
10053 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10054 /* if haven't done process_epilogue() yet, do it now */
10055 process_epilogue (asm_out_file, insn, unwind, frame);
10057 fprintf (asm_out_file, "\t.prologue\n");
10062 handled_one = false;
10063 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10064 switch (REG_NOTE_KIND (note))
10066 case REG_CFA_ADJUST_CFA:
10067 pat = XEXP (note, 0);
10069 pat = PATTERN (insn);
10070 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10071 handled_one = true;
10074 case REG_CFA_OFFSET:
10075 pat = XEXP (note, 0);
10077 pat = PATTERN (insn);
10078 process_cfa_offset (asm_out_file, pat, unwind);
10079 handled_one = true;
10082 case REG_CFA_REGISTER:
10083 pat = XEXP (note, 0);
10085 pat = PATTERN (insn);
10086 process_cfa_register (asm_out_file, pat, unwind);
10087 handled_one = true;
10090 case REG_FRAME_RELATED_EXPR:
10091 case REG_CFA_DEF_CFA:
10092 case REG_CFA_EXPRESSION:
10093 case REG_CFA_RESTORE:
10094 case REG_CFA_SET_VDRAP:
10095 /* Not used in the ia64 port. */
10096 gcc_unreachable ();
10099 /* Not a frame-related note. */
10103 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10104 explicit action to take. No guessing required. */
10105 gcc_assert (handled_one);
10108 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10111 ia64_asm_emit_except_personality (rtx personality)
10113 fputs ("\t.personality\t", asm_out_file);
10114 output_addr_const (asm_out_file, personality);
10115 fputc ('\n', asm_out_file);
10118 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10121 ia64_asm_init_sections (void)
10123 exception_section = get_unnamed_section (0, output_section_asm_op,
10127 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10129 static enum unwind_info_type
10130 ia64_debug_unwind_info (void)
10138 IA64_BUILTIN_COPYSIGNQ,
10139 IA64_BUILTIN_FABSQ,
10140 IA64_BUILTIN_FLUSHRS,
10142 IA64_BUILTIN_HUGE_VALQ,
10146 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10149 ia64_init_builtins (void)
10155 /* The __fpreg type. */
10156 fpreg_type = make_node (REAL_TYPE);
10157 TYPE_PRECISION (fpreg_type) = 82;
10158 layout_type (fpreg_type);
10159 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10161 /* The __float80 type. */
10162 float80_type = make_node (REAL_TYPE);
10163 TYPE_PRECISION (float80_type) = 80;
10164 layout_type (float80_type);
10165 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10167 /* The __float128 type. */
10171 tree float128_type = make_node (REAL_TYPE);
10173 TYPE_PRECISION (float128_type) = 128;
10174 layout_type (float128_type);
10175 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10177 /* TFmode support builtins. */
10178 ftype = build_function_type_list (float128_type, NULL_TREE);
10179 decl = add_builtin_function ("__builtin_infq", ftype,
10180 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10182 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10184 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10185 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10187 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10189 ftype = build_function_type_list (float128_type,
10192 decl = add_builtin_function ("__builtin_fabsq", ftype,
10193 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10194 "__fabstf2", NULL_TREE);
10195 TREE_READONLY (decl) = 1;
10196 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10198 ftype = build_function_type_list (float128_type,
10202 decl = add_builtin_function ("__builtin_copysignq", ftype,
10203 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10204 "__copysigntf3", NULL_TREE);
10205 TREE_READONLY (decl) = 1;
10206 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10209 /* Under HPUX, this is a synonym for "long double". */
10210 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10213 /* Fwrite on VMS is non-standard. */
10214 #if TARGET_ABI_OPEN_VMS
10215 vms_patch_builtins ();
10218 #define def_builtin(name, type, code) \
10219 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10222 decl = def_builtin ("__builtin_ia64_bsp",
10223 build_function_type_list (ptr_type_node, NULL_TREE),
10225 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10227 decl = def_builtin ("__builtin_ia64_flushrs",
10228 build_function_type_list (void_type_node, NULL_TREE),
10229 IA64_BUILTIN_FLUSHRS);
10230 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10236 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10237 set_user_assembler_name (decl, "_Isfinite");
10238 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10239 set_user_assembler_name (decl, "_Isfinitef");
10240 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10241 set_user_assembler_name (decl, "_Isfinitef128");
10246 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10247 enum machine_mode mode ATTRIBUTE_UNUSED,
10248 int ignore ATTRIBUTE_UNUSED)
10250 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10251 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10255 case IA64_BUILTIN_BSP:
10256 if (! target || ! register_operand (target, DImode))
10257 target = gen_reg_rtx (DImode);
10258 emit_insn (gen_bsp_value (target));
10259 #ifdef POINTERS_EXTEND_UNSIGNED
10260 target = convert_memory_address (ptr_mode, target);
10264 case IA64_BUILTIN_FLUSHRS:
10265 emit_insn (gen_flushrs ());
10268 case IA64_BUILTIN_INFQ:
10269 case IA64_BUILTIN_HUGE_VALQ:
10271 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10272 REAL_VALUE_TYPE inf;
10276 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10278 tmp = validize_mem (force_const_mem (target_mode, tmp));
10281 target = gen_reg_rtx (target_mode);
10283 emit_move_insn (target, tmp);
10287 case IA64_BUILTIN_FABSQ:
10288 case IA64_BUILTIN_COPYSIGNQ:
10289 return expand_call (exp, target, ignore);
10292 gcc_unreachable ();
10298 /* Return the ia64 builtin for CODE. */
10301 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10303 if (code >= IA64_BUILTIN_max)
10304 return error_mark_node;
10306 return ia64_builtins[code];
10309 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10310 most significant bits of the stack slot. */
10313 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
10315 /* Exception to normal case for structures/unions/etc. */
10317 if (type && AGGREGATE_TYPE_P (type)
10318 && int_size_in_bytes (type) < UNITS_PER_WORD)
10321 /* Fall back to the default. */
10322 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10325 /* Emit text to declare externally defined variables and functions, because
10326 the Intel assembler does not support undefined externals. */
10329 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10331 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10332 set in order to avoid putting out names that are never really
10334 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10336 /* maybe_assemble_visibility will return 1 if the assembler
10337 visibility directive is output. */
10338 int need_visibility = ((*targetm.binds_local_p) (decl)
10339 && maybe_assemble_visibility (decl));
10341 /* GNU as does not need anything here, but the HP linker does
10342 need something for external functions. */
10343 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10344 && TREE_CODE (decl) == FUNCTION_DECL)
10345 (*targetm.asm_out.globalize_decl_name) (file, decl);
10346 else if (need_visibility && !TARGET_GNU_AS)
10347 (*targetm.asm_out.globalize_label) (file, name);
10351 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10352 modes of word_mode and larger. Rename the TFmode libfuncs using the
10353 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10354 backward compatibility. */
10357 ia64_init_libfuncs (void)
10359 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10360 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10361 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10362 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10364 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10365 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10366 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10367 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10368 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10370 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10371 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10372 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10373 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10374 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10375 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10377 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10378 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10379 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10380 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10381 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10383 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10384 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10385 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10386 /* HP-UX 11.23 libc does not have a function for unsigned
10387 SImode-to-TFmode conversion. */
10388 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10391 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10394 ia64_hpux_init_libfuncs (void)
10396 ia64_init_libfuncs ();
10398 /* The HP SI millicode division and mod functions expect DI arguments.
10399 By turning them off completely we avoid using both libgcc and the
10400 non-standard millicode routines and use the HP DI millicode routines
10403 set_optab_libfunc (sdiv_optab, SImode, 0);
10404 set_optab_libfunc (udiv_optab, SImode, 0);
10405 set_optab_libfunc (smod_optab, SImode, 0);
10406 set_optab_libfunc (umod_optab, SImode, 0);
10408 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10409 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10410 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10411 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10413 /* HP-UX libc has TF min/max/abs routines in it. */
10414 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10415 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10416 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10418 /* ia64_expand_compare uses this. */
10419 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10421 /* These should never be used. */
10422 set_optab_libfunc (eq_optab, TFmode, 0);
10423 set_optab_libfunc (ne_optab, TFmode, 0);
10424 set_optab_libfunc (gt_optab, TFmode, 0);
10425 set_optab_libfunc (ge_optab, TFmode, 0);
10426 set_optab_libfunc (lt_optab, TFmode, 0);
10427 set_optab_libfunc (le_optab, TFmode, 0);
10430 /* Rename the division and modulus functions in VMS. */
10433 ia64_vms_init_libfuncs (void)
10435 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10436 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10437 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10438 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10439 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10440 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10441 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10442 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10443 abort_libfunc = init_one_libfunc ("decc$abort");
10444 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10445 #ifdef MEM_LIBFUNCS_INIT
10450 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10451 the HPUX conventions. */
10454 ia64_sysv4_init_libfuncs (void)
10456 ia64_init_libfuncs ();
10458 /* These functions are not part of the HPUX TFmode interface. We
10459 use them instead of _U_Qfcmp, which doesn't work the way we
10461 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10462 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10463 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10464 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10465 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10466 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10468 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10469 glibc doesn't have them. */
10475 ia64_soft_fp_init_libfuncs (void)
10480 ia64_vms_valid_pointer_mode (enum machine_mode mode)
10482 return (mode == SImode || mode == DImode);
10485 /* For HPUX, it is illegal to have relocations in shared segments. */
10488 ia64_hpux_reloc_rw_mask (void)
10493 /* For others, relax this so that relocations to local data goes in
10494 read-only segments, but we still cannot allow global relocations
10495 in read-only segments. */
10498 ia64_reloc_rw_mask (void)
10500 return flag_pic ? 3 : 2;
10503 /* Return the section to use for X. The only special thing we do here
10504 is to honor small data. */
10507 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10508 unsigned HOST_WIDE_INT align)
10510 if (GET_MODE_SIZE (mode) > 0
10511 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10512 && !TARGET_NO_SDATA)
10513 return sdata_section;
10515 return default_elf_select_rtx_section (mode, x, align);
10518 static unsigned int
10519 ia64_section_type_flags (tree decl, const char *name, int reloc)
10521 unsigned int flags = 0;
10523 if (strcmp (name, ".sdata") == 0
10524 || strncmp (name, ".sdata.", 7) == 0
10525 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10526 || strncmp (name, ".sdata2.", 8) == 0
10527 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10528 || strcmp (name, ".sbss") == 0
10529 || strncmp (name, ".sbss.", 6) == 0
10530 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10531 flags = SECTION_SMALL;
10533 #if TARGET_ABI_OPEN_VMS
10534 if (decl && DECL_ATTRIBUTES (decl)
10535 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10536 flags |= SECTION_VMS_OVERLAY;
10539 flags |= default_section_type_flags (decl, name, reloc);
10543 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10544 structure type and that the address of that type should be passed
10545 in out0, rather than in r8. */
10548 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10550 tree ret_type = TREE_TYPE (fntype);
10552 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10553 as the structure return address parameter, if the return value
10554 type has a non-trivial copy constructor or destructor. It is not
10555 clear if this same convention should be used for other
10556 programming languages. Until G++ 3.4, we incorrectly used r8 for
10557 these return values. */
10558 return (abi_version_at_least (2)
10560 && TYPE_MODE (ret_type) == BLKmode
10561 && TREE_ADDRESSABLE (ret_type)
10562 && strcmp (lang_hooks.name, "GNU C++") == 0);
10565 /* Output the assembler code for a thunk function. THUNK_DECL is the
10566 declaration for the thunk function itself, FUNCTION is the decl for
10567 the target function. DELTA is an immediate constant offset to be
10568 added to THIS. If VCALL_OFFSET is nonzero, the word at
10569 *(*this + vcall_offset) should be added to THIS. */
10572 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10573 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10576 rtx this_rtx, insn, funexp;
10577 unsigned int this_parmno;
10578 unsigned int this_regno;
10581 reload_completed = 1;
10582 epilogue_completed = 1;
10584 /* Set things up as ia64_expand_prologue might. */
10585 last_scratch_gr_reg = 15;
10587 memset (¤t_frame_info, 0, sizeof (current_frame_info));
10588 current_frame_info.spill_cfa_off = -16;
10589 current_frame_info.n_input_regs = 1;
10590 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10592 /* Mark the end of the (empty) prologue. */
10593 emit_note (NOTE_INSN_PROLOGUE_END);
10595 /* Figure out whether "this" will be the first parameter (the
10596 typical case) or the second parameter (as happens when the
10597 virtual function returns certain class objects). */
10599 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10601 this_regno = IN_REG (this_parmno);
10602 if (!TARGET_REG_NAMES)
10603 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10605 this_rtx = gen_rtx_REG (Pmode, this_regno);
10607 /* Apply the constant offset, if required. */
10608 delta_rtx = GEN_INT (delta);
10611 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10612 REG_POINTER (tmp) = 1;
10613 if (delta && satisfies_constraint_I (delta_rtx))
10615 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10619 emit_insn (gen_ptr_extend (this_rtx, tmp));
10623 if (!satisfies_constraint_I (delta_rtx))
10625 rtx tmp = gen_rtx_REG (Pmode, 2);
10626 emit_move_insn (tmp, delta_rtx);
10629 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10632 /* Apply the offset from the vtable, if required. */
10635 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10636 rtx tmp = gen_rtx_REG (Pmode, 2);
10640 rtx t = gen_rtx_REG (ptr_mode, 2);
10641 REG_POINTER (t) = 1;
10642 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10643 if (satisfies_constraint_I (vcall_offset_rtx))
10645 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10649 emit_insn (gen_ptr_extend (tmp, t));
10652 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10656 if (!satisfies_constraint_J (vcall_offset_rtx))
10658 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10659 emit_move_insn (tmp2, vcall_offset_rtx);
10660 vcall_offset_rtx = tmp2;
10662 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10666 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10668 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10670 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10673 /* Generate a tail call to the target function. */
10674 if (! TREE_USED (function))
10676 assemble_external (function);
10677 TREE_USED (function) = 1;
10679 funexp = XEXP (DECL_RTL (function), 0);
10680 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10681 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10682 insn = get_last_insn ();
10683 SIBLING_CALL_P (insn) = 1;
10685 /* Code generation for calls relies on splitting. */
10686 reload_completed = 1;
10687 epilogue_completed = 1;
10688 try_split (PATTERN (insn), insn, 0);
10692 /* Run just enough of rest_of_compilation to get the insns emitted.
10693 There's not really enough bulk here to make other passes such as
10694 instruction scheduling worth while. Note that use_thunk calls
10695 assemble_start_function and assemble_end_function. */
10697 insn_locators_alloc ();
10698 emit_all_insn_group_barriers (NULL);
10699 insn = get_insns ();
10700 shorten_branches (insn);
10701 final_start_function (insn, file, 1);
10702 final (insn, file, 1);
10703 final_end_function ();
10705 reload_completed = 0;
10706 epilogue_completed = 0;
10709 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10712 ia64_struct_value_rtx (tree fntype,
10713 int incoming ATTRIBUTE_UNUSED)
10715 if (TARGET_ABI_OPEN_VMS ||
10716 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10718 return gen_rtx_REG (Pmode, GR_REG (8));
10722 ia64_scalar_mode_supported_p (enum machine_mode mode)
10748 ia64_vector_mode_supported_p (enum machine_mode mode)
10765 /* Implement the FUNCTION_PROFILER macro. */
10768 ia64_output_function_profiler (FILE *file, int labelno)
10770 bool indirect_call;
10772 /* If the function needs a static chain and the static chain
10773 register is r15, we use an indirect call so as to bypass
10774 the PLT stub in case the executable is dynamically linked,
10775 because the stub clobbers r15 as per 5.3.6 of the psABI.
10776 We don't need to do that in non canonical PIC mode. */
10778 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10780 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10781 indirect_call = true;
10784 indirect_call = false;
10787 fputs ("\t.prologue 4, r40\n", file);
10789 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10790 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10792 if (NO_PROFILE_COUNTERS)
10793 fputs ("\tmov out3 = r0\n", file);
10797 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10799 if (TARGET_AUTO_PIC)
10800 fputs ("\tmovl out3 = @gprel(", file);
10802 fputs ("\taddl out3 = @ltoff(", file);
10803 assemble_name (file, buf);
10804 if (TARGET_AUTO_PIC)
10805 fputs (")\n", file);
10807 fputs ("), r1\n", file);
10811 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10812 fputs ("\t;;\n", file);
10814 fputs ("\t.save rp, r42\n", file);
10815 fputs ("\tmov out2 = b0\n", file);
10817 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10818 fputs ("\t.body\n", file);
10819 fputs ("\tmov out1 = r1\n", file);
10822 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10823 fputs ("\tmov b6 = r16\n", file);
10824 fputs ("\tld8 r1 = [r14]\n", file);
10825 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10828 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10831 static GTY(()) rtx mcount_func_rtx;
10833 gen_mcount_func_rtx (void)
10835 if (!mcount_func_rtx)
10836 mcount_func_rtx = init_one_libfunc ("_mcount");
10837 return mcount_func_rtx;
10841 ia64_profile_hook (int labelno)
10845 if (NO_PROFILE_COUNTERS)
10846 label = const0_rtx;
10850 const char *label_name;
10851 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10852 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
10853 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10854 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10856 ip = gen_reg_rtx (Pmode);
10857 emit_insn (gen_ip_value (ip));
10858 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10860 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10865 /* Return the mangling of TYPE if it is an extended fundamental type. */
10867 static const char *
10868 ia64_mangle_type (const_tree type)
10870 type = TYPE_MAIN_VARIANT (type);
10872 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10873 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10876 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10878 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10880 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10881 an extended mangling. Elsewhere, "e" is available since long
10882 double is 80 bits. */
10883 if (TYPE_MODE (type) == XFmode)
10884 return TARGET_HPUX ? "u9__float80" : "e";
10885 if (TYPE_MODE (type) == RFmode)
10886 return "u7__fpreg";
10890 /* Return the diagnostic message string if conversion from FROMTYPE to
10891 TOTYPE is not allowed, NULL otherwise. */
10892 static const char *
10893 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10895 /* Reject nontrivial conversion to or from __fpreg. */
10896 if (TYPE_MODE (fromtype) == RFmode
10897 && TYPE_MODE (totype) != RFmode
10898 && TYPE_MODE (totype) != VOIDmode)
10899 return N_("invalid conversion from %<__fpreg%>");
10900 if (TYPE_MODE (totype) == RFmode
10901 && TYPE_MODE (fromtype) != RFmode)
10902 return N_("invalid conversion to %<__fpreg%>");
10906 /* Return the diagnostic message string if the unary operation OP is
10907 not permitted on TYPE, NULL otherwise. */
10908 static const char *
10909 ia64_invalid_unary_op (int op, const_tree type)
10911 /* Reject operations on __fpreg other than unary + or &. */
10912 if (TYPE_MODE (type) == RFmode
10913 && op != CONVERT_EXPR
10914 && op != ADDR_EXPR)
10915 return N_("invalid operation on %<__fpreg%>");
10919 /* Return the diagnostic message string if the binary operation OP is
10920 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10921 static const char *
10922 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10924 /* Reject operations on __fpreg. */
10925 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10926 return N_("invalid operation on %<__fpreg%>");
10930 /* HP-UX version_id attribute.
10931 For object foo, if the version_id is set to 1234 put out an alias
10932 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10933 other than an alias statement because it is an illegal symbol name. */
10936 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10937 tree name ATTRIBUTE_UNUSED,
10939 int flags ATTRIBUTE_UNUSED,
10940 bool *no_add_attrs)
10942 tree arg = TREE_VALUE (args);
10944 if (TREE_CODE (arg) != STRING_CST)
10946 error("version attribute is not a string");
10947 *no_add_attrs = true;
10953 /* Target hook for c_mode_for_suffix. */
10955 static enum machine_mode
10956 ia64_c_mode_for_suffix (char suffix)
10966 static enum machine_mode
10967 ia64_promote_function_mode (const_tree type,
10968 enum machine_mode mode,
10970 const_tree funtype,
10973 /* Special processing required for OpenVMS ... */
10975 if (!TARGET_ABI_OPEN_VMS)
10976 return default_promote_function_mode(type, mode, punsignedp, funtype,
10979 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10980 HP OpenVMS I64 Version 8.2EFT,
10981 chapter 4 "OpenVMS I64 Conventions"
10982 section 4.7 "Procedure Linkage"
10983 subsection 4.7.5.2, "Normal Register Parameters"
10985 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10986 values passed in registers are zero-filled; signed integral values as
10987 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10988 For all other types passed in the general registers, unused bits are
10991 if (for_return != 2
10992 && GET_MODE_CLASS (mode) == MODE_INT
10993 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10995 if (mode == SImode)
11000 return promote_mode (type, mode, punsignedp);
11003 static GTY(()) rtx ia64_dconst_0_5_rtx;
11006 ia64_dconst_0_5 (void)
11008 if (! ia64_dconst_0_5_rtx)
11010 REAL_VALUE_TYPE rv;
11011 real_from_string (&rv, "0.5");
11012 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11014 return ia64_dconst_0_5_rtx;
11017 static GTY(()) rtx ia64_dconst_0_375_rtx;
11020 ia64_dconst_0_375 (void)
11022 if (! ia64_dconst_0_375_rtx)
11024 REAL_VALUE_TYPE rv;
11025 real_from_string (&rv, "0.375");
11026 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11028 return ia64_dconst_0_375_rtx;
11031 static enum machine_mode
11032 ia64_get_reg_raw_mode (int regno)
11034 if (FR_REGNO_P (regno))
11036 return default_get_reg_raw_mode(regno);
11039 /* Always default to .text section until HP-UX linker is fixed. */
11041 ATTRIBUTE_UNUSED static section *
11042 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11043 enum node_frequency freq ATTRIBUTE_UNUSED,
11044 bool startup ATTRIBUTE_UNUSED,
11045 bool exit ATTRIBUTE_UNUSED)
11050 #include "gt-ia64.h"