1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "diagnostic-core.h"
47 #include "sched-int.h"
50 #include "target-def.h"
53 #include "langhooks.h"
54 #include "cfglayout.h"
61 #include "tm-constrs.h"
62 #include "sel-sched.h"
65 /* This is used for communication between ASM_OUTPUT_LABEL and
66 ASM_OUTPUT_LABELREF. */
67 int ia64_asm_output_label = 0;
69 /* Register names for ia64_expand_prologue. */
70 static const char * const ia64_reg_numbers[96] =
71 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
72 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
73 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
74 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
75 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
76 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
77 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
78 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
79 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
80 "r104","r105","r106","r107","r108","r109","r110","r111",
81 "r112","r113","r114","r115","r116","r117","r118","r119",
82 "r120","r121","r122","r123","r124","r125","r126","r127"};
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_input_reg_names[8] =
86 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
88 /* ??? These strings could be shared with REGISTER_NAMES. */
89 static const char * const ia64_local_reg_names[80] =
90 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
91 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
92 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
93 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
94 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
95 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
96 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
97 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
98 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
99 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101 /* ??? These strings could be shared with REGISTER_NAMES. */
102 static const char * const ia64_output_reg_names[8] =
103 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105 /* Which cpu are we scheduling for. */
106 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
108 /* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110 static int ia64_flag_schedule_insns2;
112 /* Determines whether we run variable tracking in machine dependent
114 static int ia64_flag_var_tracking;
116 /* Variables which are this size or smaller are put in the sdata/sbss
119 unsigned int ia64_section_threshold;
121 /* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
134 number_of_ia64_frame_regs
137 /* Structure to be filled in by ia64_compute_frame_size with register
138 save masks and offsets for the current function. */
140 struct ia64_frame_info
142 HOST_WIDE_INT total_size; /* size of the stack frame, not including
143 the caller's scratch area. */
144 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
145 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
146 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
147 HARD_REG_SET mask; /* mask of saved registers. */
148 unsigned int gr_used_mask; /* mask of registers in use as gr spill
149 registers or long-term scratches. */
150 int n_spilled; /* number of spilled registers. */
151 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
152 int n_input_regs; /* number of input registers used. */
153 int n_local_regs; /* number of local registers used. */
154 int n_output_regs; /* number of output registers used. */
155 int n_rotate_regs; /* number of rotating registers used. */
157 char need_regstk; /* true if a .regstk directive needed. */
158 char initialized; /* true if the data is finalized. */
161 /* Current frame information calculated by ia64_compute_frame_size. */
162 static struct ia64_frame_info current_frame_info;
163 /* The actual registers that are emitted. */
164 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
166 static int ia64_first_cycle_multipass_dfa_lookahead (void);
167 static void ia64_dependencies_evaluation_hook (rtx, rtx);
168 static void ia64_init_dfa_pre_cycle_insn (void);
169 static rtx ia64_dfa_pre_cycle_insn (void);
170 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
171 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static void ia64_h_i_d_extended (void);
174 static void * ia64_alloc_sched_context (void);
175 static void ia64_init_sched_context (void *, bool);
176 static void ia64_set_sched_context (void *);
177 static void ia64_clear_sched_context (void *);
178 static void ia64_free_sched_context (void *);
179 static int ia64_mode_to_int (enum machine_mode);
180 static void ia64_set_sched_flags (spec_info_t);
181 static ds_t ia64_get_insn_spec_ds (rtx);
182 static ds_t ia64_get_insn_checked_ds (rtx);
183 static bool ia64_skip_rtx_p (const_rtx);
184 static int ia64_speculate_insn (rtx, ds_t, rtx *);
185 static bool ia64_needs_block_p (int);
186 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
187 static int ia64_spec_check_p (rtx);
188 static int ia64_spec_check_src_p (rtx);
189 static rtx gen_tls_get_addr (void);
190 static rtx gen_thread_pointer (void);
191 static int find_gr_spill (enum ia64_frame_regs, int);
192 static int next_scratch_gr_reg (void);
193 static void mark_reg_gr_used_mask (rtx, void *);
194 static void ia64_compute_frame_size (HOST_WIDE_INT);
195 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
196 static void finish_spill_pointers (void);
197 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
198 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
199 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
200 static rtx gen_movdi_x (rtx, rtx, rtx);
201 static rtx gen_fr_spill_x (rtx, rtx, rtx);
202 static rtx gen_fr_restore_x (rtx, rtx, rtx);
204 static void ia64_option_override (void);
205 static void ia64_option_optimization (int, int);
206 static void ia64_option_default_params (void);
207 static bool ia64_can_eliminate (const int, const int);
208 static enum machine_mode hfa_element_mode (const_tree, bool);
209 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
211 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
213 static bool ia64_function_ok_for_sibcall (tree, tree);
214 static bool ia64_return_in_memory (const_tree, const_tree);
215 static rtx ia64_function_value (const_tree, const_tree, bool);
216 static rtx ia64_libcall_value (enum machine_mode, const_rtx);
217 static bool ia64_function_value_regno_p (const unsigned int);
218 static int ia64_register_move_cost (enum machine_mode, reg_class_t,
220 static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
222 static bool ia64_rtx_costs (rtx, int, int, int *, bool);
223 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
224 static void fix_range (const char *);
225 static bool ia64_handle_option (size_t, const char *, int);
226 static struct machine_function * ia64_init_machine_status (void);
227 static void emit_insn_group_barriers (FILE *);
228 static void emit_all_insn_group_barriers (FILE *);
229 static void final_emit_insn_group_barriers (FILE *);
230 static void emit_predicate_relation_info (void);
231 static void ia64_reorg (void);
232 static bool ia64_in_small_data_p (const_tree);
233 static void process_epilogue (FILE *, rtx, bool, bool);
235 static bool ia64_assemble_integer (rtx, unsigned int, int);
236 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
237 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
238 static void ia64_output_function_end_prologue (FILE *);
240 static int ia64_issue_rate (void);
241 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
242 static void ia64_sched_init (FILE *, int, int);
243 static void ia64_sched_init_global (FILE *, int, int);
244 static void ia64_sched_finish_global (FILE *, int);
245 static void ia64_sched_finish (FILE *, int);
246 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
247 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
248 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
249 static int ia64_variable_issue (FILE *, int, rtx, int);
251 static void ia64_asm_unwind_emit (FILE *, rtx);
252 static void ia64_asm_emit_except_personality (rtx);
253 static void ia64_asm_init_sections (void);
255 static enum unwind_info_type ia64_debug_unwind_info (void);
256 static enum unwind_info_type ia64_except_unwind_info (void);
258 static struct bundle_state *get_free_bundle_state (void);
259 static void free_bundle_state (struct bundle_state *);
260 static void initiate_bundle_states (void);
261 static void finish_bundle_states (void);
262 static unsigned bundle_state_hash (const void *);
263 static int bundle_state_eq_p (const void *, const void *);
264 static int insert_bundle_state (struct bundle_state *);
265 static void initiate_bundle_state_table (void);
266 static void finish_bundle_state_table (void);
267 static int try_issue_nops (struct bundle_state *, int);
268 static int try_issue_insn (struct bundle_state *, rtx);
269 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
270 static int get_max_pos (state_t);
271 static int get_template (state_t, int);
273 static rtx get_next_important_insn (rtx, rtx);
274 static bool important_for_bundling_p (rtx);
275 static void bundling (FILE *, int, rtx, rtx);
277 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
278 HOST_WIDE_INT, tree);
279 static void ia64_file_start (void);
280 static void ia64_globalize_decl_name (FILE *, tree);
282 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
283 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
284 static section *ia64_select_rtx_section (enum machine_mode, rtx,
285 unsigned HOST_WIDE_INT);
286 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
288 static unsigned int ia64_section_type_flags (tree, const char *, int);
289 static void ia64_init_libfuncs (void)
291 static void ia64_hpux_init_libfuncs (void)
293 static void ia64_sysv4_init_libfuncs (void)
295 static void ia64_vms_init_libfuncs (void)
297 static void ia64_soft_fp_init_libfuncs (void)
299 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
301 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
304 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
305 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
306 static void ia64_encode_section_info (tree, rtx, int);
307 static rtx ia64_struct_value_rtx (tree, int);
308 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
309 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
310 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
311 static bool ia64_cannot_force_const_mem (rtx);
312 static const char *ia64_mangle_type (const_tree);
313 static const char *ia64_invalid_conversion (const_tree, const_tree);
314 static const char *ia64_invalid_unary_op (int, const_tree);
315 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
316 static enum machine_mode ia64_c_mode_for_suffix (char);
317 static enum machine_mode ia64_promote_function_mode (const_tree,
322 static void ia64_trampoline_init (rtx, tree, rtx);
323 static void ia64_override_options_after_change (void);
325 static void ia64_dwarf_handle_frame_unspec (const char *, rtx, int);
326 static tree ia64_builtin_decl (unsigned, bool);
328 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
330 /* Table of valid machine attributes. */
331 static const struct attribute_spec ia64_attribute_table[] =
333 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
334 { "syscall_linkage", 0, 0, false, true, true, NULL },
335 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
336 #if TARGET_ABI_OPEN_VMS
337 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute},
339 { "version_id", 1, 1, true, false, false,
340 ia64_handle_version_id_attribute },
341 { NULL, 0, 0, false, false, false, NULL }
344 /* Initialize the GCC target structure. */
345 #undef TARGET_ATTRIBUTE_TABLE
346 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
348 #undef TARGET_INIT_BUILTINS
349 #define TARGET_INIT_BUILTINS ia64_init_builtins
351 #undef TARGET_EXPAND_BUILTIN
352 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
354 #undef TARGET_BUILTIN_DECL
355 #define TARGET_BUILTIN_DECL ia64_builtin_decl
357 #undef TARGET_ASM_BYTE_OP
358 #define TARGET_ASM_BYTE_OP "\tdata1\t"
359 #undef TARGET_ASM_ALIGNED_HI_OP
360 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
361 #undef TARGET_ASM_ALIGNED_SI_OP
362 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
363 #undef TARGET_ASM_ALIGNED_DI_OP
364 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
365 #undef TARGET_ASM_UNALIGNED_HI_OP
366 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
367 #undef TARGET_ASM_UNALIGNED_SI_OP
368 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
369 #undef TARGET_ASM_UNALIGNED_DI_OP
370 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
371 #undef TARGET_ASM_INTEGER
372 #define TARGET_ASM_INTEGER ia64_assemble_integer
374 #undef TARGET_OPTION_OVERRIDE
375 #define TARGET_OPTION_OVERRIDE ia64_option_override
376 #undef TARGET_OPTION_OPTIMIZATION
377 #define TARGET_OPTION_OPTIMIZATION ia64_option_optimization
378 #undef TARGET_OPTION_DEFAULT_PARAMS
379 #define TARGET_OPTION_DEFAULT_PARAMS ia64_option_default_params
381 #undef TARGET_ASM_FUNCTION_PROLOGUE
382 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
383 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
384 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
385 #undef TARGET_ASM_FUNCTION_EPILOGUE
386 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
388 #undef TARGET_IN_SMALL_DATA_P
389 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
391 #undef TARGET_SCHED_ADJUST_COST_2
392 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
393 #undef TARGET_SCHED_ISSUE_RATE
394 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
395 #undef TARGET_SCHED_VARIABLE_ISSUE
396 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
397 #undef TARGET_SCHED_INIT
398 #define TARGET_SCHED_INIT ia64_sched_init
399 #undef TARGET_SCHED_FINISH
400 #define TARGET_SCHED_FINISH ia64_sched_finish
401 #undef TARGET_SCHED_INIT_GLOBAL
402 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
403 #undef TARGET_SCHED_FINISH_GLOBAL
404 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
405 #undef TARGET_SCHED_REORDER
406 #define TARGET_SCHED_REORDER ia64_sched_reorder
407 #undef TARGET_SCHED_REORDER2
408 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
410 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
411 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
413 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
414 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
416 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
417 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
418 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
419 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
421 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
422 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
423 ia64_first_cycle_multipass_dfa_lookahead_guard
425 #undef TARGET_SCHED_DFA_NEW_CYCLE
426 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
428 #undef TARGET_SCHED_H_I_D_EXTENDED
429 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
431 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
432 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
434 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
435 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
437 #undef TARGET_SCHED_SET_SCHED_CONTEXT
438 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
440 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
441 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
443 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
444 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
446 #undef TARGET_SCHED_SET_SCHED_FLAGS
447 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
449 #undef TARGET_SCHED_GET_INSN_SPEC_DS
450 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
452 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
453 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
455 #undef TARGET_SCHED_SPECULATE_INSN
456 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
458 #undef TARGET_SCHED_NEEDS_BLOCK_P
459 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
461 #undef TARGET_SCHED_GEN_SPEC_CHECK
462 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
464 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
465 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
466 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
468 #undef TARGET_SCHED_SKIP_RTX_P
469 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
471 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
472 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
473 #undef TARGET_ARG_PARTIAL_BYTES
474 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
476 #undef TARGET_ASM_OUTPUT_MI_THUNK
477 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
478 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
479 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
481 #undef TARGET_ASM_FILE_START
482 #define TARGET_ASM_FILE_START ia64_file_start
484 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
485 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
487 #undef TARGET_REGISTER_MOVE_COST
488 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
489 #undef TARGET_MEMORY_MOVE_COST
490 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
491 #undef TARGET_RTX_COSTS
492 #define TARGET_RTX_COSTS ia64_rtx_costs
493 #undef TARGET_ADDRESS_COST
494 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
496 #undef TARGET_UNSPEC_MAY_TRAP_P
497 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
499 #undef TARGET_MACHINE_DEPENDENT_REORG
500 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
502 #undef TARGET_ENCODE_SECTION_INFO
503 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
505 #undef TARGET_SECTION_TYPE_FLAGS
506 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
509 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
510 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
513 #undef TARGET_PROMOTE_FUNCTION_MODE
514 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
516 /* ??? Investigate. */
518 #undef TARGET_PROMOTE_PROTOTYPES
519 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
522 #undef TARGET_FUNCTION_VALUE
523 #define TARGET_FUNCTION_VALUE ia64_function_value
524 #undef TARGET_LIBCALL_VALUE
525 #define TARGET_LIBCALL_VALUE ia64_libcall_value
526 #undef TARGET_FUNCTION_VALUE_REGNO_P
527 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
529 #undef TARGET_STRUCT_VALUE_RTX
530 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
531 #undef TARGET_RETURN_IN_MEMORY
532 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
533 #undef TARGET_SETUP_INCOMING_VARARGS
534 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
535 #undef TARGET_STRICT_ARGUMENT_NAMING
536 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
537 #undef TARGET_MUST_PASS_IN_STACK
538 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
540 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
541 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
543 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
544 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ia64_dwarf_handle_frame_unspec
545 #undef TARGET_ASM_UNWIND_EMIT
546 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
547 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
548 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
552 #undef TARGET_DEBUG_UNWIND_INFO
553 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
554 #undef TARGET_EXCEPT_UNWIND_INFO
555 #define TARGET_EXCEPT_UNWIND_INFO ia64_except_unwind_info
557 #undef TARGET_SCALAR_MODE_SUPPORTED_P
558 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
559 #undef TARGET_VECTOR_MODE_SUPPORTED_P
560 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
562 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
563 in an order different from the specified program order. */
564 #undef TARGET_RELAXED_ORDERING
565 #define TARGET_RELAXED_ORDERING true
567 #undef TARGET_DEFAULT_TARGET_FLAGS
568 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
569 #undef TARGET_HANDLE_OPTION
570 #define TARGET_HANDLE_OPTION ia64_handle_option
572 #undef TARGET_CANNOT_FORCE_CONST_MEM
573 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
575 #undef TARGET_MANGLE_TYPE
576 #define TARGET_MANGLE_TYPE ia64_mangle_type
578 #undef TARGET_INVALID_CONVERSION
579 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
580 #undef TARGET_INVALID_UNARY_OP
581 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
582 #undef TARGET_INVALID_BINARY_OP
583 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
585 #undef TARGET_C_MODE_FOR_SUFFIX
586 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
588 #undef TARGET_CAN_ELIMINATE
589 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
591 #undef TARGET_TRAMPOLINE_INIT
592 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
594 #undef TARGET_INVALID_WITHIN_DOLOOP
595 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
597 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
598 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
600 #undef TARGET_PREFERRED_RELOAD_CLASS
601 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
603 struct gcc_target targetm = TARGET_INITIALIZER;
607 ADDR_AREA_NORMAL, /* normal address area */
608 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
612 static GTY(()) tree small_ident1;
613 static GTY(()) tree small_ident2;
618 if (small_ident1 == 0)
620 small_ident1 = get_identifier ("small");
621 small_ident2 = get_identifier ("__small__");
625 /* Retrieve the address area that has been chosen for the given decl. */
627 static ia64_addr_area
628 ia64_get_addr_area (tree decl)
632 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
638 id = TREE_VALUE (TREE_VALUE (model_attr));
639 if (id == small_ident1 || id == small_ident2)
640 return ADDR_AREA_SMALL;
642 return ADDR_AREA_NORMAL;
646 ia64_handle_model_attribute (tree *node, tree name, tree args,
647 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
649 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
651 tree arg, decl = *node;
654 arg = TREE_VALUE (args);
655 if (arg == small_ident1 || arg == small_ident2)
657 addr_area = ADDR_AREA_SMALL;
661 warning (OPT_Wattributes, "invalid argument of %qE attribute",
663 *no_add_attrs = true;
666 switch (TREE_CODE (decl))
669 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
671 && !TREE_STATIC (decl))
673 error_at (DECL_SOURCE_LOCATION (decl),
674 "an address area attribute cannot be specified for "
676 *no_add_attrs = true;
678 area = ia64_get_addr_area (decl);
679 if (area != ADDR_AREA_NORMAL && addr_area != area)
681 error ("address area of %q+D conflicts with previous "
682 "declaration", decl);
683 *no_add_attrs = true;
688 error_at (DECL_SOURCE_LOCATION (decl),
689 "address area attribute cannot be specified for "
691 *no_add_attrs = true;
695 warning (OPT_Wattributes, "%qE attribute ignored",
697 *no_add_attrs = true;
704 /* The section must have global and overlaid attributes. */
705 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
707 /* Part of the low level implementation of DEC Ada pragma Common_Object which
708 enables the shared use of variables stored in overlaid linker areas
709 corresponding to the use of Fortran COMMON. */
712 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
713 int flags ATTRIBUTE_UNUSED,
721 DECL_COMMON (decl) = 1;
722 id = TREE_VALUE (args);
723 if (TREE_CODE (id) == IDENTIFIER_NODE)
724 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
725 else if (TREE_CODE (id) == STRING_CST)
729 warning (OPT_Wattributes,
730 "%qE attribute requires a string constant argument", name);
731 *no_add_attrs = true;
734 DECL_SECTION_NAME (decl) = val;
738 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
741 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
742 unsigned HOST_WIDE_INT size,
745 tree attr = DECL_ATTRIBUTES (decl);
747 /* As common_object attribute set DECL_SECTION_NAME check it before
748 looking up the attribute. */
749 if (DECL_SECTION_NAME (decl) && attr)
750 attr = lookup_attribute ("common_object", attr);
756 /* Code from elfos.h. */
757 fprintf (file, "%s", COMMON_ASM_OP);
758 assemble_name (file, name);
759 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
760 size, align / BITS_PER_UNIT);
764 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
765 ASM_OUTPUT_LABEL (file, name);
766 ASM_OUTPUT_SKIP (file, size ? size : 1);
770 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
773 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
776 if (!(flags & SECTION_VMS_OVERLAY))
778 default_elf_asm_named_section (name, flags, decl);
781 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
784 if (flags & SECTION_DECLARED)
786 fprintf (asm_out_file, "\t.section\t%s\n", name);
790 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
794 ia64_encode_addr_area (tree decl, rtx symbol)
798 flags = SYMBOL_REF_FLAGS (symbol);
799 switch (ia64_get_addr_area (decl))
801 case ADDR_AREA_NORMAL: break;
802 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
803 default: gcc_unreachable ();
805 SYMBOL_REF_FLAGS (symbol) = flags;
809 ia64_encode_section_info (tree decl, rtx rtl, int first)
811 default_encode_section_info (decl, rtl, first);
813 /* Careful not to prod global register variables. */
814 if (TREE_CODE (decl) == VAR_DECL
815 && GET_CODE (DECL_RTL (decl)) == MEM
816 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
817 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
818 ia64_encode_addr_area (decl, XEXP (rtl, 0));
821 /* Return 1 if the operands of a move are ok. */
824 ia64_move_ok (rtx dst, rtx src)
826 /* If we're under init_recog_no_volatile, we'll not be able to use
827 memory_operand. So check the code directly and don't worry about
828 the validity of the underlying address, which should have been
829 checked elsewhere anyway. */
830 if (GET_CODE (dst) != MEM)
832 if (GET_CODE (src) == MEM)
834 if (register_operand (src, VOIDmode))
837 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
838 if (INTEGRAL_MODE_P (GET_MODE (dst)))
839 return src == const0_rtx;
841 return satisfies_constraint_G (src);
844 /* Return 1 if the operands are ok for a floating point load pair. */
847 ia64_load_pair_ok (rtx dst, rtx src)
849 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
851 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
853 switch (GET_CODE (XEXP (src, 0)))
862 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
864 if (GET_CODE (adjust) != CONST_INT
865 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
876 addp4_optimize_ok (rtx op1, rtx op2)
878 return (basereg_operand (op1, GET_MODE(op1)) !=
879 basereg_operand (op2, GET_MODE(op2)));
882 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
883 Return the length of the field, or <= 0 on failure. */
886 ia64_depz_field_mask (rtx rop, rtx rshift)
888 unsigned HOST_WIDE_INT op = INTVAL (rop);
889 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
891 /* Get rid of the zero bits we're shifting in. */
894 /* We must now have a solid block of 1's at bit 0. */
895 return exact_log2 (op + 1);
898 /* Return the TLS model to use for ADDR. */
900 static enum tls_model
901 tls_symbolic_operand_type (rtx addr)
903 enum tls_model tls_kind = TLS_MODEL_NONE;
905 if (GET_CODE (addr) == CONST)
907 if (GET_CODE (XEXP (addr, 0)) == PLUS
908 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
909 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
911 else if (GET_CODE (addr) == SYMBOL_REF)
912 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
917 /* Return true if X is a constant that is valid for some immediate
918 field in an instruction. */
921 ia64_legitimate_constant_p (rtx x)
923 switch (GET_CODE (x))
930 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
931 || GET_MODE (x) == DFmode)
933 return satisfies_constraint_G (x);
937 /* ??? Short term workaround for PR 28490. We must make the code here
938 match the code in ia64_expand_move and move_operand, even though they
939 are both technically wrong. */
940 if (tls_symbolic_operand_type (x) == 0)
942 HOST_WIDE_INT addend = 0;
945 if (GET_CODE (op) == CONST
946 && GET_CODE (XEXP (op, 0)) == PLUS
947 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
949 addend = INTVAL (XEXP (XEXP (op, 0), 1));
950 op = XEXP (XEXP (op, 0), 0);
953 if (any_offset_symbol_operand (op, GET_MODE (op))
954 || function_operand (op, GET_MODE (op)))
956 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
957 return (addend & 0x3fff) == 0;
964 enum machine_mode mode = GET_MODE (x);
966 if (mode == V2SFmode)
967 return satisfies_constraint_Y (x);
969 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
970 && GET_MODE_SIZE (mode) <= 8);
978 /* Don't allow TLS addresses to get spilled to memory. */
981 ia64_cannot_force_const_mem (rtx x)
983 if (GET_MODE (x) == RFmode)
985 return tls_symbolic_operand_type (x) != 0;
988 /* Expand a symbolic constant load. */
991 ia64_expand_load_address (rtx dest, rtx src)
993 gcc_assert (GET_CODE (dest) == REG);
995 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
996 having to pointer-extend the value afterward. Other forms of address
997 computation below are also more natural to compute as 64-bit quantities.
998 If we've been given an SImode destination register, change it. */
999 if (GET_MODE (dest) != Pmode)
1000 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1001 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1005 if (small_addr_symbolic_operand (src, VOIDmode))
1008 if (TARGET_AUTO_PIC)
1009 emit_insn (gen_load_gprel64 (dest, src));
1010 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1011 emit_insn (gen_load_fptr (dest, src));
1012 else if (sdata_symbolic_operand (src, VOIDmode))
1013 emit_insn (gen_load_gprel (dest, src));
1016 HOST_WIDE_INT addend = 0;
1019 /* We did split constant offsets in ia64_expand_move, and we did try
1020 to keep them split in move_operand, but we also allowed reload to
1021 rematerialize arbitrary constants rather than spill the value to
1022 the stack and reload it. So we have to be prepared here to split
1023 them apart again. */
1024 if (GET_CODE (src) == CONST)
1026 HOST_WIDE_INT hi, lo;
1028 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1029 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1035 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1039 tmp = gen_rtx_HIGH (Pmode, src);
1040 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1041 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1043 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
1044 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1048 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1049 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1056 static GTY(()) rtx gen_tls_tga;
1058 gen_tls_get_addr (void)
1061 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1065 static GTY(()) rtx thread_pointer_rtx;
1067 gen_thread_pointer (void)
1069 if (!thread_pointer_rtx)
1070 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1071 return thread_pointer_rtx;
1075 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1076 rtx orig_op1, HOST_WIDE_INT addend)
1078 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1080 HOST_WIDE_INT addend_lo, addend_hi;
1084 case TLS_MODEL_GLOBAL_DYNAMIC:
1087 tga_op1 = gen_reg_rtx (Pmode);
1088 emit_insn (gen_load_dtpmod (tga_op1, op1));
1090 tga_op2 = gen_reg_rtx (Pmode);
1091 emit_insn (gen_load_dtprel (tga_op2, op1));
1093 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1094 LCT_CONST, Pmode, 2, tga_op1,
1095 Pmode, tga_op2, Pmode);
1097 insns = get_insns ();
1100 if (GET_MODE (op0) != Pmode)
1102 emit_libcall_block (insns, op0, tga_ret, op1);
1105 case TLS_MODEL_LOCAL_DYNAMIC:
1106 /* ??? This isn't the completely proper way to do local-dynamic
1107 If the call to __tls_get_addr is used only by a single symbol,
1108 then we should (somehow) move the dtprel to the second arg
1109 to avoid the extra add. */
1112 tga_op1 = gen_reg_rtx (Pmode);
1113 emit_insn (gen_load_dtpmod (tga_op1, op1));
1115 tga_op2 = const0_rtx;
1117 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1118 LCT_CONST, Pmode, 2, tga_op1,
1119 Pmode, tga_op2, Pmode);
1121 insns = get_insns ();
1124 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1126 tmp = gen_reg_rtx (Pmode);
1127 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1129 if (!register_operand (op0, Pmode))
1130 op0 = gen_reg_rtx (Pmode);
1133 emit_insn (gen_load_dtprel (op0, op1));
1134 emit_insn (gen_adddi3 (op0, tmp, op0));
1137 emit_insn (gen_add_dtprel (op0, op1, tmp));
1140 case TLS_MODEL_INITIAL_EXEC:
1141 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1142 addend_hi = addend - addend_lo;
1144 op1 = plus_constant (op1, addend_hi);
1147 tmp = gen_reg_rtx (Pmode);
1148 emit_insn (gen_load_tprel (tmp, op1));
1150 if (!register_operand (op0, Pmode))
1151 op0 = gen_reg_rtx (Pmode);
1152 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1155 case TLS_MODEL_LOCAL_EXEC:
1156 if (!register_operand (op0, Pmode))
1157 op0 = gen_reg_rtx (Pmode);
1163 emit_insn (gen_load_tprel (op0, op1));
1164 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1167 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1175 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1176 orig_op0, 1, OPTAB_DIRECT);
1177 if (orig_op0 == op0)
1179 if (GET_MODE (orig_op0) == Pmode)
1181 return gen_lowpart (GET_MODE (orig_op0), op0);
1185 ia64_expand_move (rtx op0, rtx op1)
1187 enum machine_mode mode = GET_MODE (op0);
1189 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1190 op1 = force_reg (mode, op1);
1192 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1194 HOST_WIDE_INT addend = 0;
1195 enum tls_model tls_kind;
1198 if (GET_CODE (op1) == CONST
1199 && GET_CODE (XEXP (op1, 0)) == PLUS
1200 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1202 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1203 sym = XEXP (XEXP (op1, 0), 0);
1206 tls_kind = tls_symbolic_operand_type (sym);
1208 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1210 if (any_offset_symbol_operand (sym, mode))
1212 else if (aligned_offset_symbol_operand (sym, mode))
1214 HOST_WIDE_INT addend_lo, addend_hi;
1216 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1217 addend_hi = addend - addend_lo;
1221 op1 = plus_constant (sym, addend_hi);
1230 if (reload_completed)
1232 /* We really should have taken care of this offset earlier. */
1233 gcc_assert (addend == 0);
1234 if (ia64_expand_load_address (op0, op1))
1240 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1242 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1244 op1 = expand_simple_binop (mode, PLUS, subtarget,
1245 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1254 /* Split a move from OP1 to OP0 conditional on COND. */
1257 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1259 rtx insn, first = get_last_insn ();
1261 emit_move_insn (op0, op1);
1263 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1265 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1269 /* Split a post-reload TImode or TFmode reference into two DImode
1270 components. This is made extra difficult by the fact that we do
1271 not get any scratch registers to work with, because reload cannot
1272 be prevented from giving us a scratch that overlaps the register
1273 pair involved. So instead, when addressing memory, we tweak the
1274 pointer register up and back down with POST_INCs. Or up and not
1275 back down when we can get away with it.
1277 REVERSED is true when the loads must be done in reversed order
1278 (high word first) for correctness. DEAD is true when the pointer
1279 dies with the second insn we generate and therefore the second
1280 address must not carry a postmodify.
1282 May return an insn which is to be emitted after the moves. */
1285 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1289 switch (GET_CODE (in))
1292 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1293 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1298 /* Cannot occur reversed. */
1299 gcc_assert (!reversed);
1301 if (GET_MODE (in) != TFmode)
1302 split_double (in, &out[0], &out[1]);
1304 /* split_double does not understand how to split a TFmode
1305 quantity into a pair of DImode constants. */
1308 unsigned HOST_WIDE_INT p[2];
1309 long l[4]; /* TFmode is 128 bits */
1311 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1312 real_to_target (l, &r, TFmode);
1314 if (FLOAT_WORDS_BIG_ENDIAN)
1316 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1317 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1321 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1322 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1324 out[0] = GEN_INT (p[0]);
1325 out[1] = GEN_INT (p[1]);
1331 rtx base = XEXP (in, 0);
1334 switch (GET_CODE (base))
1339 out[0] = adjust_automodify_address
1340 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1341 out[1] = adjust_automodify_address
1342 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1346 /* Reversal requires a pre-increment, which can only
1347 be done as a separate insn. */
1348 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1349 out[0] = adjust_automodify_address
1350 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1351 out[1] = adjust_address (in, DImode, 0);
1356 gcc_assert (!reversed && !dead);
1358 /* Just do the increment in two steps. */
1359 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1360 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1364 gcc_assert (!reversed && !dead);
1366 /* Add 8, subtract 24. */
1367 base = XEXP (base, 0);
1368 out[0] = adjust_automodify_address
1369 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1370 out[1] = adjust_automodify_address
1372 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1377 gcc_assert (!reversed && !dead);
1379 /* Extract and adjust the modification. This case is
1380 trickier than the others, because we might have an
1381 index register, or we might have a combined offset that
1382 doesn't fit a signed 9-bit displacement field. We can
1383 assume the incoming expression is already legitimate. */
1384 offset = XEXP (base, 1);
1385 base = XEXP (base, 0);
1387 out[0] = adjust_automodify_address
1388 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1390 if (GET_CODE (XEXP (offset, 1)) == REG)
1392 /* Can't adjust the postmodify to match. Emit the
1393 original, then a separate addition insn. */
1394 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1395 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1399 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1400 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1402 /* Again the postmodify cannot be made to match,
1403 but in this case it's more efficient to get rid
1404 of the postmodify entirely and fix up with an
1406 out[1] = adjust_automodify_address (in, DImode, base, 8);
1408 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1412 /* Combined offset still fits in the displacement field.
1413 (We cannot overflow it at the high end.) */
1414 out[1] = adjust_automodify_address
1415 (in, DImode, gen_rtx_POST_MODIFY
1416 (Pmode, base, gen_rtx_PLUS
1418 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1437 /* Split a TImode or TFmode move instruction after reload.
1438 This is used by *movtf_internal and *movti_internal. */
1440 ia64_split_tmode_move (rtx operands[])
1442 rtx in[2], out[2], insn;
1445 bool reversed = false;
1447 /* It is possible for reload to decide to overwrite a pointer with
1448 the value it points to. In that case we have to do the loads in
1449 the appropriate order so that the pointer is not destroyed too
1450 early. Also we must not generate a postmodify for that second
1451 load, or rws_access_regno will die. */
1452 if (GET_CODE (operands[1]) == MEM
1453 && reg_overlap_mentioned_p (operands[0], operands[1]))
1455 rtx base = XEXP (operands[1], 0);
1456 while (GET_CODE (base) != REG)
1457 base = XEXP (base, 0);
1459 if (REGNO (base) == REGNO (operands[0]))
1463 /* Another reason to do the moves in reversed order is if the first
1464 element of the target register pair is also the second element of
1465 the source register pair. */
1466 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1467 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1470 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1471 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1473 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1474 if (GET_CODE (EXP) == MEM \
1475 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1476 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1477 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1478 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1480 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1481 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1482 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1484 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1485 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1486 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1489 emit_insn (fixup[0]);
1491 emit_insn (fixup[1]);
1493 #undef MAYBE_ADD_REG_INC_NOTE
1496 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1497 through memory plus an extra GR scratch register. Except that you can
1498 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1499 SECONDARY_RELOAD_CLASS, but not both.
1501 We got into problems in the first place by allowing a construct like
1502 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1503 This solution attempts to prevent this situation from occurring. When
1504 we see something like the above, we spill the inner register to memory. */
1507 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1509 if (GET_CODE (in) == SUBREG
1510 && GET_MODE (SUBREG_REG (in)) == TImode
1511 && GET_CODE (SUBREG_REG (in)) == REG)
1513 rtx memt = assign_stack_temp (TImode, 16, 0);
1514 emit_move_insn (memt, SUBREG_REG (in));
1515 return adjust_address (memt, mode, 0);
1517 else if (force && GET_CODE (in) == REG)
1519 rtx memx = assign_stack_temp (mode, 16, 0);
1520 emit_move_insn (memx, in);
1527 /* Expand the movxf or movrf pattern (MODE says which) with the given
1528 OPERANDS, returning true if the pattern should then invoke
1532 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1534 rtx op0 = operands[0];
1536 if (GET_CODE (op0) == SUBREG)
1537 op0 = SUBREG_REG (op0);
1539 /* We must support XFmode loads into general registers for stdarg/vararg,
1540 unprototyped calls, and a rare case where a long double is passed as
1541 an argument after a float HFA fills the FP registers. We split them into
1542 DImode loads for convenience. We also need to support XFmode stores
1543 for the last case. This case does not happen for stdarg/vararg routines,
1544 because we do a block store to memory of unnamed arguments. */
1546 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1550 /* We're hoping to transform everything that deals with XFmode
1551 quantities and GR registers early in the compiler. */
1552 gcc_assert (can_create_pseudo_p ());
1554 /* Struct to register can just use TImode instead. */
1555 if ((GET_CODE (operands[1]) == SUBREG
1556 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1557 || (GET_CODE (operands[1]) == REG
1558 && GR_REGNO_P (REGNO (operands[1]))))
1560 rtx op1 = operands[1];
1562 if (GET_CODE (op1) == SUBREG)
1563 op1 = SUBREG_REG (op1);
1565 op1 = gen_rtx_REG (TImode, REGNO (op1));
1567 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1571 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1573 /* Don't word-swap when reading in the constant. */
1574 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1575 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1577 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1578 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1583 /* If the quantity is in a register not known to be GR, spill it. */
1584 if (register_operand (operands[1], mode))
1585 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1587 gcc_assert (GET_CODE (operands[1]) == MEM);
1589 /* Don't word-swap when reading in the value. */
1590 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1591 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1593 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1594 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1598 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1600 /* We're hoping to transform everything that deals with XFmode
1601 quantities and GR registers early in the compiler. */
1602 gcc_assert (can_create_pseudo_p ());
1604 /* Op0 can't be a GR_REG here, as that case is handled above.
1605 If op0 is a register, then we spill op1, so that we now have a
1606 MEM operand. This requires creating an XFmode subreg of a TImode reg
1607 to force the spill. */
1608 if (register_operand (operands[0], mode))
1610 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1611 op1 = gen_rtx_SUBREG (mode, op1, 0);
1612 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1619 gcc_assert (GET_CODE (operands[0]) == MEM);
1621 /* Don't word-swap when writing out the value. */
1622 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1623 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1625 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1626 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1631 if (!reload_in_progress && !reload_completed)
1633 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1635 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1637 rtx memt, memx, in = operands[1];
1638 if (CONSTANT_P (in))
1639 in = validize_mem (force_const_mem (mode, in));
1640 if (GET_CODE (in) == MEM)
1641 memt = adjust_address (in, TImode, 0);
1644 memt = assign_stack_temp (TImode, 16, 0);
1645 memx = adjust_address (memt, mode, 0);
1646 emit_move_insn (memx, in);
1648 emit_move_insn (op0, memt);
1652 if (!ia64_move_ok (operands[0], operands[1]))
1653 operands[1] = force_reg (mode, operands[1]);
1659 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1660 with the expression that holds the compare result (in VOIDmode). */
1662 static GTY(()) rtx cmptf_libfunc;
1665 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1667 enum rtx_code code = GET_CODE (*expr);
1670 /* If we have a BImode input, then we already have a compare result, and
1671 do not need to emit another comparison. */
1672 if (GET_MODE (*op0) == BImode)
1674 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1677 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1678 magic number as its third argument, that indicates what to do.
1679 The return value is an integer to be compared against zero. */
1680 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1683 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1690 enum rtx_code ncode;
1693 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1696 /* 1 = equal, 0 = not equal. Equality operators do
1697 not raise FP_INVALID when given an SNaN operand. */
1698 case EQ: magic = QCMP_EQ; ncode = NE; break;
1699 case NE: magic = QCMP_EQ; ncode = EQ; break;
1700 /* isunordered() from C99. */
1701 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1702 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1703 /* Relational operators raise FP_INVALID when given
1705 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1706 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1707 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1708 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1709 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1710 Expanders for buneq etc. weuld have to be added to ia64.md
1711 for this to be useful. */
1712 default: gcc_unreachable ();
1717 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1718 *op0, TFmode, *op1, TFmode,
1719 GEN_INT (magic), DImode);
1720 cmp = gen_reg_rtx (BImode);
1721 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1722 gen_rtx_fmt_ee (ncode, BImode,
1725 insns = get_insns ();
1728 emit_libcall_block (insns, cmp, cmp,
1729 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1734 cmp = gen_reg_rtx (BImode);
1735 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1736 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1740 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1745 /* Generate an integral vector comparison. Return true if the condition has
1746 been reversed, and so the sense of the comparison should be inverted. */
1749 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1750 rtx dest, rtx op0, rtx op1)
1752 bool negate = false;
1755 /* Canonicalize the comparison to EQ, GT, GTU. */
1766 code = reverse_condition (code);
1772 code = reverse_condition (code);
1778 code = swap_condition (code);
1779 x = op0, op0 = op1, op1 = x;
1786 /* Unsigned parallel compare is not supported by the hardware. Play some
1787 tricks to turn this into a signed comparison against 0. */
1796 /* Subtract (-(INT MAX) - 1) from both operands to make
1798 mask = GEN_INT (0x80000000);
1799 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1800 mask = force_reg (mode, mask);
1801 t1 = gen_reg_rtx (mode);
1802 emit_insn (gen_subv2si3 (t1, op0, mask));
1803 t2 = gen_reg_rtx (mode);
1804 emit_insn (gen_subv2si3 (t2, op1, mask));
1813 /* Perform a parallel unsigned saturating subtraction. */
1814 x = gen_reg_rtx (mode);
1815 emit_insn (gen_rtx_SET (VOIDmode, x,
1816 gen_rtx_US_MINUS (mode, op0, op1)));
1820 op1 = CONST0_RTX (mode);
1829 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1830 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1835 /* Emit an integral vector conditional move. */
1838 ia64_expand_vecint_cmov (rtx operands[])
1840 enum machine_mode mode = GET_MODE (operands[0]);
1841 enum rtx_code code = GET_CODE (operands[3]);
1845 cmp = gen_reg_rtx (mode);
1846 negate = ia64_expand_vecint_compare (code, mode, cmp,
1847 operands[4], operands[5]);
1849 ot = operands[1+negate];
1850 of = operands[2-negate];
1852 if (ot == CONST0_RTX (mode))
1854 if (of == CONST0_RTX (mode))
1856 emit_move_insn (operands[0], ot);
1860 x = gen_rtx_NOT (mode, cmp);
1861 x = gen_rtx_AND (mode, x, of);
1862 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1864 else if (of == CONST0_RTX (mode))
1866 x = gen_rtx_AND (mode, cmp, ot);
1867 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1873 t = gen_reg_rtx (mode);
1874 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1875 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1877 f = gen_reg_rtx (mode);
1878 x = gen_rtx_NOT (mode, cmp);
1879 x = gen_rtx_AND (mode, x, operands[2-negate]);
1880 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1882 x = gen_rtx_IOR (mode, t, f);
1883 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1887 /* Emit an integral vector min or max operation. Return true if all done. */
1890 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1895 /* These four combinations are supported directly. */
1896 if (mode == V8QImode && (code == UMIN || code == UMAX))
1898 if (mode == V4HImode && (code == SMIN || code == SMAX))
1901 /* This combination can be implemented with only saturating subtraction. */
1902 if (mode == V4HImode && code == UMAX)
1904 rtx x, tmp = gen_reg_rtx (mode);
1906 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1907 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1909 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1913 /* Everything else implemented via vector comparisons. */
1914 xops[0] = operands[0];
1915 xops[4] = xops[1] = operands[1];
1916 xops[5] = xops[2] = operands[2];
1935 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1937 ia64_expand_vecint_cmov (xops);
1941 /* Emit an integral vector widening sum operations. */
1944 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1947 enum machine_mode wmode, mode;
1948 rtx (*unpack_l) (rtx, rtx, rtx);
1949 rtx (*unpack_h) (rtx, rtx, rtx);
1950 rtx (*plus) (rtx, rtx, rtx);
1952 wmode = GET_MODE (operands[0]);
1953 mode = GET_MODE (operands[1]);
1958 unpack_l = gen_unpack1_l;
1959 unpack_h = gen_unpack1_h;
1960 plus = gen_addv4hi3;
1963 unpack_l = gen_unpack2_l;
1964 unpack_h = gen_unpack2_h;
1965 plus = gen_addv2si3;
1971 /* Fill in x with the sign extension of each element in op1. */
1973 x = CONST0_RTX (mode);
1978 x = gen_reg_rtx (mode);
1980 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1985 l = gen_reg_rtx (wmode);
1986 h = gen_reg_rtx (wmode);
1987 s = gen_reg_rtx (wmode);
1989 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1990 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1991 emit_insn (plus (s, l, operands[2]));
1992 emit_insn (plus (operands[0], h, s));
1995 /* Emit a signed or unsigned V8QI dot product operation. */
1998 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
2000 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
2002 /* Fill in x1 and x2 with the sign extension of each element. */
2004 x1 = x2 = CONST0_RTX (V8QImode);
2009 x1 = gen_reg_rtx (V8QImode);
2010 x2 = gen_reg_rtx (V8QImode);
2012 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
2013 CONST0_RTX (V8QImode));
2015 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
2016 CONST0_RTX (V8QImode));
2020 l1 = gen_reg_rtx (V4HImode);
2021 l2 = gen_reg_rtx (V4HImode);
2022 h1 = gen_reg_rtx (V4HImode);
2023 h2 = gen_reg_rtx (V4HImode);
2025 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
2026 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
2027 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
2028 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
2030 p1 = gen_reg_rtx (V2SImode);
2031 p2 = gen_reg_rtx (V2SImode);
2032 p3 = gen_reg_rtx (V2SImode);
2033 p4 = gen_reg_rtx (V2SImode);
2034 emit_insn (gen_pmpy2_r (p1, l1, l2));
2035 emit_insn (gen_pmpy2_l (p2, l1, l2));
2036 emit_insn (gen_pmpy2_r (p3, h1, h2));
2037 emit_insn (gen_pmpy2_l (p4, h1, h2));
2039 s1 = gen_reg_rtx (V2SImode);
2040 s2 = gen_reg_rtx (V2SImode);
2041 s3 = gen_reg_rtx (V2SImode);
2042 emit_insn (gen_addv2si3 (s1, p1, p2));
2043 emit_insn (gen_addv2si3 (s2, p3, p4));
2044 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2045 emit_insn (gen_addv2si3 (operands[0], s2, s3));
2048 /* Emit the appropriate sequence for a call. */
2051 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2056 addr = XEXP (addr, 0);
2057 addr = convert_memory_address (DImode, addr);
2058 b0 = gen_rtx_REG (DImode, R_BR (0));
2060 /* ??? Should do this for functions known to bind local too. */
2061 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2064 insn = gen_sibcall_nogp (addr);
2066 insn = gen_call_nogp (addr, b0);
2068 insn = gen_call_value_nogp (retval, addr, b0);
2069 insn = emit_call_insn (insn);
2074 insn = gen_sibcall_gp (addr);
2076 insn = gen_call_gp (addr, b0);
2078 insn = gen_call_value_gp (retval, addr, b0);
2079 insn = emit_call_insn (insn);
2081 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2085 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2087 if (TARGET_ABI_OPEN_VMS)
2088 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2089 gen_rtx_REG (DImode, GR_REG (25)));
2093 reg_emitted (enum ia64_frame_regs r)
2095 if (emitted_frame_related_regs[r] == 0)
2096 emitted_frame_related_regs[r] = current_frame_info.r[r];
2098 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2102 get_reg (enum ia64_frame_regs r)
2105 return current_frame_info.r[r];
2109 is_emitted (int regno)
2113 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2114 if (emitted_frame_related_regs[r] == regno)
2120 ia64_reload_gp (void)
2124 if (current_frame_info.r[reg_save_gp])
2126 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2130 HOST_WIDE_INT offset;
2133 offset = (current_frame_info.spill_cfa_off
2134 + current_frame_info.spill_size);
2135 if (frame_pointer_needed)
2137 tmp = hard_frame_pointer_rtx;
2142 tmp = stack_pointer_rtx;
2143 offset = current_frame_info.total_size - offset;
2146 offset_r = GEN_INT (offset);
2147 if (satisfies_constraint_I (offset_r))
2148 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2151 emit_move_insn (pic_offset_table_rtx, offset_r);
2152 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2153 pic_offset_table_rtx, tmp));
2156 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2159 emit_move_insn (pic_offset_table_rtx, tmp);
2163 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2164 rtx scratch_b, int noreturn_p, int sibcall_p)
2167 bool is_desc = false;
2169 /* If we find we're calling through a register, then we're actually
2170 calling through a descriptor, so load up the values. */
2171 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2176 /* ??? We are currently constrained to *not* use peep2, because
2177 we can legitimately change the global lifetime of the GP
2178 (in the form of killing where previously live). This is
2179 because a call through a descriptor doesn't use the previous
2180 value of the GP, while a direct call does, and we do not
2181 commit to either form until the split here.
2183 That said, this means that we lack precise life info for
2184 whether ADDR is dead after this call. This is not terribly
2185 important, since we can fix things up essentially for free
2186 with the POST_DEC below, but it's nice to not use it when we
2187 can immediately tell it's not necessary. */
2188 addr_dead_p = ((noreturn_p || sibcall_p
2189 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2191 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2193 /* Load the code address into scratch_b. */
2194 tmp = gen_rtx_POST_INC (Pmode, addr);
2195 tmp = gen_rtx_MEM (Pmode, tmp);
2196 emit_move_insn (scratch_r, tmp);
2197 emit_move_insn (scratch_b, scratch_r);
2199 /* Load the GP address. If ADDR is not dead here, then we must
2200 revert the change made above via the POST_INCREMENT. */
2202 tmp = gen_rtx_POST_DEC (Pmode, addr);
2205 tmp = gen_rtx_MEM (Pmode, tmp);
2206 emit_move_insn (pic_offset_table_rtx, tmp);
2213 insn = gen_sibcall_nogp (addr);
2215 insn = gen_call_value_nogp (retval, addr, retaddr);
2217 insn = gen_call_nogp (addr, retaddr);
2218 emit_call_insn (insn);
2220 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2224 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2226 This differs from the generic code in that we know about the zero-extending
2227 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2228 also know that ld.acq+cmpxchg.rel equals a full barrier.
2230 The loop we want to generate looks like
2235 new_reg = cmp_reg op val;
2236 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2237 if (cmp_reg != old_reg)
2240 Note that we only do the plain load from memory once. Subsequent
2241 iterations use the value loaded by the compare-and-swap pattern. */
2244 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2245 rtx old_dst, rtx new_dst)
2247 enum machine_mode mode = GET_MODE (mem);
2248 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2249 enum insn_code icode;
2251 /* Special case for using fetchadd. */
2252 if ((mode == SImode || mode == DImode)
2253 && (code == PLUS || code == MINUS)
2254 && fetchadd_operand (val, mode))
2257 val = GEN_INT (-INTVAL (val));
2260 old_dst = gen_reg_rtx (mode);
2262 emit_insn (gen_memory_barrier ());
2265 icode = CODE_FOR_fetchadd_acq_si;
2267 icode = CODE_FOR_fetchadd_acq_di;
2268 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2272 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2274 if (new_reg != new_dst)
2275 emit_move_insn (new_dst, new_reg);
2280 /* Because of the volatile mem read, we get an ld.acq, which is the
2281 front half of the full barrier. The end half is the cmpxchg.rel. */
2282 gcc_assert (MEM_VOLATILE_P (mem));
2284 old_reg = gen_reg_rtx (DImode);
2285 cmp_reg = gen_reg_rtx (DImode);
2286 label = gen_label_rtx ();
2290 val = simplify_gen_subreg (DImode, val, mode, 0);
2291 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2294 emit_move_insn (cmp_reg, mem);
2298 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2299 emit_move_insn (old_reg, cmp_reg);
2300 emit_move_insn (ar_ccv, cmp_reg);
2303 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2308 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2309 true, OPTAB_DIRECT);
2310 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2313 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2314 true, OPTAB_DIRECT);
2317 new_reg = gen_lowpart (mode, new_reg);
2319 emit_move_insn (new_dst, new_reg);
2323 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2324 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2325 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2326 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2331 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2333 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2336 /* Begin the assembly file. */
2339 ia64_file_start (void)
2341 /* Variable tracking should be run after all optimizations which change order
2342 of insns. It also needs a valid CFG. This can't be done in
2343 ia64_option_override, because flag_var_tracking is finalized after
2345 ia64_flag_var_tracking = flag_var_tracking;
2346 flag_var_tracking = 0;
2348 default_file_start ();
2349 emit_safe_across_calls ();
2353 emit_safe_across_calls (void)
2355 unsigned int rs, re;
2362 while (rs < 64 && call_used_regs[PR_REG (rs)])
2366 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2370 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2374 fputc (',', asm_out_file);
2376 fprintf (asm_out_file, "p%u", rs);
2378 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2382 fputc ('\n', asm_out_file);
2385 /* Globalize a declaration. */
2388 ia64_globalize_decl_name (FILE * stream, tree decl)
2390 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2391 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2394 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2395 const char *p = TREE_STRING_POINTER (v);
2396 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2398 targetm.asm_out.globalize_label (stream, name);
2399 if (TREE_CODE (decl) == FUNCTION_DECL)
2400 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2403 /* Helper function for ia64_compute_frame_size: find an appropriate general
2404 register to spill some special register to. SPECIAL_SPILL_MASK contains
2405 bits in GR0 to GR31 that have already been allocated by this routine.
2406 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2409 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2413 if (emitted_frame_related_regs[r] != 0)
2415 regno = emitted_frame_related_regs[r];
2416 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2417 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2418 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2419 else if (current_function_is_leaf
2420 && regno >= GR_REG (1) && regno <= GR_REG (31))
2421 current_frame_info.gr_used_mask |= 1 << regno;
2426 /* If this is a leaf function, first try an otherwise unused
2427 call-clobbered register. */
2428 if (current_function_is_leaf)
2430 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2431 if (! df_regs_ever_live_p (regno)
2432 && call_used_regs[regno]
2433 && ! fixed_regs[regno]
2434 && ! global_regs[regno]
2435 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2436 && ! is_emitted (regno))
2438 current_frame_info.gr_used_mask |= 1 << regno;
2445 regno = current_frame_info.n_local_regs;
2446 /* If there is a frame pointer, then we can't use loc79, because
2447 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2448 reg_name switching code in ia64_expand_prologue. */
2449 while (regno < (80 - frame_pointer_needed))
2450 if (! is_emitted (LOC_REG (regno++)))
2452 current_frame_info.n_local_regs = regno;
2453 return LOC_REG (regno - 1);
2457 /* Failed to find a general register to spill to. Must use stack. */
2461 /* In order to make for nice schedules, we try to allocate every temporary
2462 to a different register. We must of course stay away from call-saved,
2463 fixed, and global registers. We must also stay away from registers
2464 allocated in current_frame_info.gr_used_mask, since those include regs
2465 used all through the prologue.
2467 Any register allocated here must be used immediately. The idea is to
2468 aid scheduling, not to solve data flow problems. */
2470 static int last_scratch_gr_reg;
2473 next_scratch_gr_reg (void)
2477 for (i = 0; i < 32; ++i)
2479 regno = (last_scratch_gr_reg + i + 1) & 31;
2480 if (call_used_regs[regno]
2481 && ! fixed_regs[regno]
2482 && ! global_regs[regno]
2483 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2485 last_scratch_gr_reg = regno;
2490 /* There must be _something_ available. */
2494 /* Helper function for ia64_compute_frame_size, called through
2495 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2498 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2500 unsigned int regno = REGNO (reg);
2503 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2504 for (i = 0; i < n; ++i)
2505 current_frame_info.gr_used_mask |= 1 << (regno + i);
2510 /* Returns the number of bytes offset between the frame pointer and the stack
2511 pointer for the current function. SIZE is the number of bytes of space
2512 needed for local variables. */
2515 ia64_compute_frame_size (HOST_WIDE_INT size)
2517 HOST_WIDE_INT total_size;
2518 HOST_WIDE_INT spill_size = 0;
2519 HOST_WIDE_INT extra_spill_size = 0;
2520 HOST_WIDE_INT pretend_args_size;
2523 int spilled_gr_p = 0;
2524 int spilled_fr_p = 0;
2530 if (current_frame_info.initialized)
2533 memset (¤t_frame_info, 0, sizeof current_frame_info);
2534 CLEAR_HARD_REG_SET (mask);
2536 /* Don't allocate scratches to the return register. */
2537 diddle_return_value (mark_reg_gr_used_mask, NULL);
2539 /* Don't allocate scratches to the EH scratch registers. */
2540 if (cfun->machine->ia64_eh_epilogue_sp)
2541 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2542 if (cfun->machine->ia64_eh_epilogue_bsp)
2543 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2545 /* Find the size of the register stack frame. We have only 80 local
2546 registers, because we reserve 8 for the inputs and 8 for the
2549 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2550 since we'll be adjusting that down later. */
2551 regno = LOC_REG (78) + ! frame_pointer_needed;
2552 for (; regno >= LOC_REG (0); regno--)
2553 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2555 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2557 /* For functions marked with the syscall_linkage attribute, we must mark
2558 all eight input registers as in use, so that locals aren't visible to
2561 if (cfun->machine->n_varargs > 0
2562 || lookup_attribute ("syscall_linkage",
2563 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2564 current_frame_info.n_input_regs = 8;
2567 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2568 if (df_regs_ever_live_p (regno))
2570 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2573 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2574 if (df_regs_ever_live_p (regno))
2576 i = regno - OUT_REG (0) + 1;
2578 #ifndef PROFILE_HOOK
2579 /* When -p profiling, we need one output register for the mcount argument.
2580 Likewise for -a profiling for the bb_init_func argument. For -ax
2581 profiling, we need two output registers for the two bb_init_trace_func
2586 current_frame_info.n_output_regs = i;
2588 /* ??? No rotating register support yet. */
2589 current_frame_info.n_rotate_regs = 0;
2591 /* Discover which registers need spilling, and how much room that
2592 will take. Begin with floating point and general registers,
2593 which will always wind up on the stack. */
2595 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2596 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2598 SET_HARD_REG_BIT (mask, regno);
2604 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2605 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2607 SET_HARD_REG_BIT (mask, regno);
2613 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2614 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2616 SET_HARD_REG_BIT (mask, regno);
2621 /* Now come all special registers that might get saved in other
2622 general registers. */
2624 if (frame_pointer_needed)
2626 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2627 /* If we did not get a register, then we take LOC79. This is guaranteed
2628 to be free, even if regs_ever_live is already set, because this is
2629 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2630 as we don't count loc79 above. */
2631 if (current_frame_info.r[reg_fp] == 0)
2633 current_frame_info.r[reg_fp] = LOC_REG (79);
2634 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2638 if (! current_function_is_leaf)
2640 /* Emit a save of BR0 if we call other functions. Do this even
2641 if this function doesn't return, as EH depends on this to be
2642 able to unwind the stack. */
2643 SET_HARD_REG_BIT (mask, BR_REG (0));
2645 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2646 if (current_frame_info.r[reg_save_b0] == 0)
2648 extra_spill_size += 8;
2652 /* Similarly for ar.pfs. */
2653 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2654 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2655 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2657 extra_spill_size += 8;
2661 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2662 registers are clobbered, so we fall back to the stack. */
2663 current_frame_info.r[reg_save_gp]
2664 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2665 if (current_frame_info.r[reg_save_gp] == 0)
2667 SET_HARD_REG_BIT (mask, GR_REG (1));
2674 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2676 SET_HARD_REG_BIT (mask, BR_REG (0));
2677 extra_spill_size += 8;
2681 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2683 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2684 current_frame_info.r[reg_save_ar_pfs]
2685 = find_gr_spill (reg_save_ar_pfs, 1);
2686 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2688 extra_spill_size += 8;
2694 /* Unwind descriptor hackery: things are most efficient if we allocate
2695 consecutive GR save registers for RP, PFS, FP in that order. However,
2696 it is absolutely critical that FP get the only hard register that's
2697 guaranteed to be free, so we allocated it first. If all three did
2698 happen to be allocated hard regs, and are consecutive, rearrange them
2699 into the preferred order now.
2701 If we have already emitted code for any of those registers,
2702 then it's already too late to change. */
2703 min_regno = MIN (current_frame_info.r[reg_fp],
2704 MIN (current_frame_info.r[reg_save_b0],
2705 current_frame_info.r[reg_save_ar_pfs]));
2706 max_regno = MAX (current_frame_info.r[reg_fp],
2707 MAX (current_frame_info.r[reg_save_b0],
2708 current_frame_info.r[reg_save_ar_pfs]));
2710 && min_regno + 2 == max_regno
2711 && (current_frame_info.r[reg_fp] == min_regno + 1
2712 || current_frame_info.r[reg_save_b0] == min_regno + 1
2713 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2714 && (emitted_frame_related_regs[reg_save_b0] == 0
2715 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2716 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2717 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2718 && (emitted_frame_related_regs[reg_fp] == 0
2719 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2721 current_frame_info.r[reg_save_b0] = min_regno;
2722 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2723 current_frame_info.r[reg_fp] = min_regno + 2;
2726 /* See if we need to store the predicate register block. */
2727 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2728 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2730 if (regno <= PR_REG (63))
2732 SET_HARD_REG_BIT (mask, PR_REG (0));
2733 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2734 if (current_frame_info.r[reg_save_pr] == 0)
2736 extra_spill_size += 8;
2740 /* ??? Mark them all as used so that register renaming and such
2741 are free to use them. */
2742 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2743 df_set_regs_ever_live (regno, true);
2746 /* If we're forced to use st8.spill, we're forced to save and restore
2747 ar.unat as well. The check for existing liveness allows inline asm
2748 to touch ar.unat. */
2749 if (spilled_gr_p || cfun->machine->n_varargs
2750 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2752 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2753 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2754 current_frame_info.r[reg_save_ar_unat]
2755 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2756 if (current_frame_info.r[reg_save_ar_unat] == 0)
2758 extra_spill_size += 8;
2763 if (df_regs_ever_live_p (AR_LC_REGNUM))
2765 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2766 current_frame_info.r[reg_save_ar_lc]
2767 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2768 if (current_frame_info.r[reg_save_ar_lc] == 0)
2770 extra_spill_size += 8;
2775 /* If we have an odd number of words of pretend arguments written to
2776 the stack, then the FR save area will be unaligned. We round the
2777 size of this area up to keep things 16 byte aligned. */
2779 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2781 pretend_args_size = crtl->args.pretend_args_size;
2783 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2784 + crtl->outgoing_args_size);
2785 total_size = IA64_STACK_ALIGN (total_size);
2787 /* We always use the 16-byte scratch area provided by the caller, but
2788 if we are a leaf function, there's no one to which we need to provide
2790 if (current_function_is_leaf)
2791 total_size = MAX (0, total_size - 16);
2793 current_frame_info.total_size = total_size;
2794 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2795 current_frame_info.spill_size = spill_size;
2796 current_frame_info.extra_spill_size = extra_spill_size;
2797 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2798 current_frame_info.n_spilled = n_spilled;
2799 current_frame_info.initialized = reload_completed;
2802 /* Worker function for TARGET_CAN_ELIMINATE. */
2805 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2807 return (to == BR_REG (0) ? current_function_is_leaf : true);
2810 /* Compute the initial difference between the specified pair of registers. */
2813 ia64_initial_elimination_offset (int from, int to)
2815 HOST_WIDE_INT offset;
2817 ia64_compute_frame_size (get_frame_size ());
2820 case FRAME_POINTER_REGNUM:
2823 case HARD_FRAME_POINTER_REGNUM:
2824 if (current_function_is_leaf)
2825 offset = -current_frame_info.total_size;
2827 offset = -(current_frame_info.total_size
2828 - crtl->outgoing_args_size - 16);
2831 case STACK_POINTER_REGNUM:
2832 if (current_function_is_leaf)
2835 offset = 16 + crtl->outgoing_args_size;
2843 case ARG_POINTER_REGNUM:
2844 /* Arguments start above the 16 byte save area, unless stdarg
2845 in which case we store through the 16 byte save area. */
2848 case HARD_FRAME_POINTER_REGNUM:
2849 offset = 16 - crtl->args.pretend_args_size;
2852 case STACK_POINTER_REGNUM:
2853 offset = (current_frame_info.total_size
2854 + 16 - crtl->args.pretend_args_size);
2869 /* If there are more than a trivial number of register spills, we use
2870 two interleaved iterators so that we can get two memory references
2873 In order to simplify things in the prologue and epilogue expanders,
2874 we use helper functions to fix up the memory references after the
2875 fact with the appropriate offsets to a POST_MODIFY memory mode.
2876 The following data structure tracks the state of the two iterators
2877 while insns are being emitted. */
2879 struct spill_fill_data
2881 rtx init_after; /* point at which to emit initializations */
2882 rtx init_reg[2]; /* initial base register */
2883 rtx iter_reg[2]; /* the iterator registers */
2884 rtx *prev_addr[2]; /* address of last memory use */
2885 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2886 HOST_WIDE_INT prev_off[2]; /* last offset */
2887 int n_iter; /* number of iterators in use */
2888 int next_iter; /* next iterator to use */
2889 unsigned int save_gr_used_mask;
2892 static struct spill_fill_data spill_fill_data;
2895 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2899 spill_fill_data.init_after = get_last_insn ();
2900 spill_fill_data.init_reg[0] = init_reg;
2901 spill_fill_data.init_reg[1] = init_reg;
2902 spill_fill_data.prev_addr[0] = NULL;
2903 spill_fill_data.prev_addr[1] = NULL;
2904 spill_fill_data.prev_insn[0] = NULL;
2905 spill_fill_data.prev_insn[1] = NULL;
2906 spill_fill_data.prev_off[0] = cfa_off;
2907 spill_fill_data.prev_off[1] = cfa_off;
2908 spill_fill_data.next_iter = 0;
2909 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2911 spill_fill_data.n_iter = 1 + (n_spills > 2);
2912 for (i = 0; i < spill_fill_data.n_iter; ++i)
2914 int regno = next_scratch_gr_reg ();
2915 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2916 current_frame_info.gr_used_mask |= 1 << regno;
2921 finish_spill_pointers (void)
2923 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2927 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2929 int iter = spill_fill_data.next_iter;
2930 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2931 rtx disp_rtx = GEN_INT (disp);
2934 if (spill_fill_data.prev_addr[iter])
2936 if (satisfies_constraint_N (disp_rtx))
2938 *spill_fill_data.prev_addr[iter]
2939 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2940 gen_rtx_PLUS (DImode,
2941 spill_fill_data.iter_reg[iter],
2943 add_reg_note (spill_fill_data.prev_insn[iter],
2944 REG_INC, spill_fill_data.iter_reg[iter]);
2948 /* ??? Could use register post_modify for loads. */
2949 if (!satisfies_constraint_I (disp_rtx))
2951 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2952 emit_move_insn (tmp, disp_rtx);
2955 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2956 spill_fill_data.iter_reg[iter], disp_rtx));
2959 /* Micro-optimization: if we've created a frame pointer, it's at
2960 CFA 0, which may allow the real iterator to be initialized lower,
2961 slightly increasing parallelism. Also, if there are few saves
2962 it may eliminate the iterator entirely. */
2964 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2965 && frame_pointer_needed)
2967 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2968 set_mem_alias_set (mem, get_varargs_alias_set ());
2976 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2977 spill_fill_data.init_reg[iter]);
2982 if (!satisfies_constraint_I (disp_rtx))
2984 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2985 emit_move_insn (tmp, disp_rtx);
2989 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2990 spill_fill_data.init_reg[iter],
2997 /* Careful for being the first insn in a sequence. */
2998 if (spill_fill_data.init_after)
2999 insn = emit_insn_after (seq, spill_fill_data.init_after);
3002 rtx first = get_insns ();
3004 insn = emit_insn_before (seq, first);
3006 insn = emit_insn (seq);
3008 spill_fill_data.init_after = insn;
3011 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3013 /* ??? Not all of the spills are for varargs, but some of them are.
3014 The rest of the spills belong in an alias set of their own. But
3015 it doesn't actually hurt to include them here. */
3016 set_mem_alias_set (mem, get_varargs_alias_set ());
3018 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3019 spill_fill_data.prev_off[iter] = cfa_off;
3021 if (++iter >= spill_fill_data.n_iter)
3023 spill_fill_data.next_iter = iter;
3029 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3032 int iter = spill_fill_data.next_iter;
3035 mem = spill_restore_mem (reg, cfa_off);
3036 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3037 spill_fill_data.prev_insn[iter] = insn;
3044 RTX_FRAME_RELATED_P (insn) = 1;
3046 /* Don't even pretend that the unwind code can intuit its way
3047 through a pair of interleaved post_modify iterators. Just
3048 provide the correct answer. */
3050 if (frame_pointer_needed)
3052 base = hard_frame_pointer_rtx;
3057 base = stack_pointer_rtx;
3058 off = current_frame_info.total_size - cfa_off;
3061 add_reg_note (insn, REG_CFA_OFFSET,
3062 gen_rtx_SET (VOIDmode,
3063 gen_rtx_MEM (GET_MODE (reg),
3064 plus_constant (base, off)),
3070 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3072 int iter = spill_fill_data.next_iter;
3075 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3076 GEN_INT (cfa_off)));
3077 spill_fill_data.prev_insn[iter] = insn;
3080 /* Wrapper functions that discards the CONST_INT spill offset. These
3081 exist so that we can give gr_spill/gr_fill the offset they need and
3082 use a consistent function interface. */
3085 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3087 return gen_movdi (dest, src);
3091 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3093 return gen_fr_spill (dest, src);
3097 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3099 return gen_fr_restore (dest, src);
3102 /* Called after register allocation to add any instructions needed for the
3103 prologue. Using a prologue insn is favored compared to putting all of the
3104 instructions in output_function_prologue(), since it allows the scheduler
3105 to intermix instructions with the saves of the caller saved registers. In
3106 some cases, it might be necessary to emit a barrier instruction as the last
3107 insn to prevent such scheduling.
3109 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3110 so that the debug info generation code can handle them properly.
3112 The register save area is layed out like so:
3114 [ varargs spill area ]
3115 [ fr register spill area ]
3116 [ br register spill area ]
3117 [ ar register spill area ]
3118 [ pr register spill area ]
3119 [ gr register spill area ] */
3121 /* ??? Get inefficient code when the frame size is larger than can fit in an
3122 adds instruction. */
3125 ia64_expand_prologue (void)
3127 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3128 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3131 ia64_compute_frame_size (get_frame_size ());
3132 last_scratch_gr_reg = 15;
3134 if (flag_stack_usage)
3135 current_function_static_stack_size = current_frame_info.total_size;
3139 fprintf (dump_file, "ia64 frame related registers "
3140 "recorded in current_frame_info.r[]:\n");
3141 #define PRINTREG(a) if (current_frame_info.r[a]) \
3142 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3144 PRINTREG(reg_save_b0);
3145 PRINTREG(reg_save_pr);
3146 PRINTREG(reg_save_ar_pfs);
3147 PRINTREG(reg_save_ar_unat);
3148 PRINTREG(reg_save_ar_lc);
3149 PRINTREG(reg_save_gp);
3153 /* If there is no epilogue, then we don't need some prologue insns.
3154 We need to avoid emitting the dead prologue insns, because flow
3155 will complain about them. */
3161 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3162 if ((e->flags & EDGE_FAKE) == 0
3163 && (e->flags & EDGE_FALLTHRU) != 0)
3165 epilogue_p = (e != NULL);
3170 /* Set the local, input, and output register names. We need to do this
3171 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3172 half. If we use in/loc/out register names, then we get assembler errors
3173 in crtn.S because there is no alloc insn or regstk directive in there. */
3174 if (! TARGET_REG_NAMES)
3176 int inputs = current_frame_info.n_input_regs;
3177 int locals = current_frame_info.n_local_regs;
3178 int outputs = current_frame_info.n_output_regs;
3180 for (i = 0; i < inputs; i++)
3181 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3182 for (i = 0; i < locals; i++)
3183 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3184 for (i = 0; i < outputs; i++)
3185 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3188 /* Set the frame pointer register name. The regnum is logically loc79,
3189 but of course we'll not have allocated that many locals. Rather than
3190 worrying about renumbering the existing rtxs, we adjust the name. */
3191 /* ??? This code means that we can never use one local register when
3192 there is a frame pointer. loc79 gets wasted in this case, as it is
3193 renamed to a register that will never be used. See also the try_locals
3194 code in find_gr_spill. */
3195 if (current_frame_info.r[reg_fp])
3197 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3198 reg_names[HARD_FRAME_POINTER_REGNUM]
3199 = reg_names[current_frame_info.r[reg_fp]];
3200 reg_names[current_frame_info.r[reg_fp]] = tmp;
3203 /* We don't need an alloc instruction if we've used no outputs or locals. */
3204 if (current_frame_info.n_local_regs == 0
3205 && current_frame_info.n_output_regs == 0
3206 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3207 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3209 /* If there is no alloc, but there are input registers used, then we
3210 need a .regstk directive. */
3211 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3212 ar_pfs_save_reg = NULL_RTX;
3216 current_frame_info.need_regstk = 0;
3218 if (current_frame_info.r[reg_save_ar_pfs])
3220 regno = current_frame_info.r[reg_save_ar_pfs];
3221 reg_emitted (reg_save_ar_pfs);
3224 regno = next_scratch_gr_reg ();
3225 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3227 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3228 GEN_INT (current_frame_info.n_input_regs),
3229 GEN_INT (current_frame_info.n_local_regs),
3230 GEN_INT (current_frame_info.n_output_regs),
3231 GEN_INT (current_frame_info.n_rotate_regs)));
3232 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3235 /* Set up frame pointer, stack pointer, and spill iterators. */
3237 n_varargs = cfun->machine->n_varargs;
3238 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3239 stack_pointer_rtx, 0);
3241 if (frame_pointer_needed)
3243 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3244 RTX_FRAME_RELATED_P (insn) = 1;
3246 /* Force the unwind info to recognize this as defining a new CFA,
3247 rather than some temp register setup. */
3248 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3251 if (current_frame_info.total_size != 0)
3253 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3256 if (satisfies_constraint_I (frame_size_rtx))
3257 offset = frame_size_rtx;
3260 regno = next_scratch_gr_reg ();
3261 offset = gen_rtx_REG (DImode, regno);
3262 emit_move_insn (offset, frame_size_rtx);
3265 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3266 stack_pointer_rtx, offset));
3268 if (! frame_pointer_needed)
3270 RTX_FRAME_RELATED_P (insn) = 1;
3271 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3272 gen_rtx_SET (VOIDmode,
3274 gen_rtx_PLUS (DImode,
3279 /* ??? At this point we must generate a magic insn that appears to
3280 modify the stack pointer, the frame pointer, and all spill
3281 iterators. This would allow the most scheduling freedom. For
3282 now, just hard stop. */
3283 emit_insn (gen_blockage ());
3286 /* Must copy out ar.unat before doing any integer spills. */
3287 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3289 if (current_frame_info.r[reg_save_ar_unat])
3292 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3293 reg_emitted (reg_save_ar_unat);
3297 alt_regno = next_scratch_gr_reg ();
3298 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3299 current_frame_info.gr_used_mask |= 1 << alt_regno;
3302 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3303 insn = emit_move_insn (ar_unat_save_reg, reg);
3304 if (current_frame_info.r[reg_save_ar_unat])
3306 RTX_FRAME_RELATED_P (insn) = 1;
3307 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3310 /* Even if we're not going to generate an epilogue, we still
3311 need to save the register so that EH works. */
3312 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3313 emit_insn (gen_prologue_use (ar_unat_save_reg));
3316 ar_unat_save_reg = NULL_RTX;
3318 /* Spill all varargs registers. Do this before spilling any GR registers,
3319 since we want the UNAT bits for the GR registers to override the UNAT
3320 bits from varargs, which we don't care about. */
3323 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3325 reg = gen_rtx_REG (DImode, regno);
3326 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3329 /* Locate the bottom of the register save area. */
3330 cfa_off = (current_frame_info.spill_cfa_off
3331 + current_frame_info.spill_size
3332 + current_frame_info.extra_spill_size);
3334 /* Save the predicate register block either in a register or in memory. */
3335 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3337 reg = gen_rtx_REG (DImode, PR_REG (0));
3338 if (current_frame_info.r[reg_save_pr] != 0)
3340 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3341 reg_emitted (reg_save_pr);
3342 insn = emit_move_insn (alt_reg, reg);
3344 /* ??? Denote pr spill/fill by a DImode move that modifies all
3345 64 hard registers. */
3346 RTX_FRAME_RELATED_P (insn) = 1;
3347 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3349 /* Even if we're not going to generate an epilogue, we still
3350 need to save the register so that EH works. */
3352 emit_insn (gen_prologue_use (alt_reg));
3356 alt_regno = next_scratch_gr_reg ();
3357 alt_reg = gen_rtx_REG (DImode, alt_regno);
3358 insn = emit_move_insn (alt_reg, reg);
3359 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3364 /* Handle AR regs in numerical order. All of them get special handling. */
3365 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3366 && current_frame_info.r[reg_save_ar_unat] == 0)
3368 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3369 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3373 /* The alloc insn already copied ar.pfs into a general register. The
3374 only thing we have to do now is copy that register to a stack slot
3375 if we'd not allocated a local register for the job. */
3376 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3377 && current_frame_info.r[reg_save_ar_pfs] == 0)
3379 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3380 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3384 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3386 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3387 if (current_frame_info.r[reg_save_ar_lc] != 0)
3389 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3390 reg_emitted (reg_save_ar_lc);
3391 insn = emit_move_insn (alt_reg, reg);
3392 RTX_FRAME_RELATED_P (insn) = 1;
3393 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3395 /* Even if we're not going to generate an epilogue, we still
3396 need to save the register so that EH works. */
3398 emit_insn (gen_prologue_use (alt_reg));
3402 alt_regno = next_scratch_gr_reg ();
3403 alt_reg = gen_rtx_REG (DImode, alt_regno);
3404 emit_move_insn (alt_reg, reg);
3405 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3410 /* Save the return pointer. */
3411 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3413 reg = gen_rtx_REG (DImode, BR_REG (0));
3414 if (current_frame_info.r[reg_save_b0] != 0)
3416 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3417 reg_emitted (reg_save_b0);
3418 insn = emit_move_insn (alt_reg, reg);
3419 RTX_FRAME_RELATED_P (insn) = 1;
3420 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3422 /* Even if we're not going to generate an epilogue, we still
3423 need to save the register so that EH works. */
3425 emit_insn (gen_prologue_use (alt_reg));
3429 alt_regno = next_scratch_gr_reg ();
3430 alt_reg = gen_rtx_REG (DImode, alt_regno);
3431 emit_move_insn (alt_reg, reg);
3432 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3437 if (current_frame_info.r[reg_save_gp])
3439 reg_emitted (reg_save_gp);
3440 insn = emit_move_insn (gen_rtx_REG (DImode,
3441 current_frame_info.r[reg_save_gp]),
3442 pic_offset_table_rtx);
3445 /* We should now be at the base of the gr/br/fr spill area. */
3446 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3447 + current_frame_info.spill_size));
3449 /* Spill all general registers. */
3450 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3451 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3453 reg = gen_rtx_REG (DImode, regno);
3454 do_spill (gen_gr_spill, reg, cfa_off, reg);
3458 /* Spill the rest of the BR registers. */
3459 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3460 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3462 alt_regno = next_scratch_gr_reg ();
3463 alt_reg = gen_rtx_REG (DImode, alt_regno);
3464 reg = gen_rtx_REG (DImode, regno);
3465 emit_move_insn (alt_reg, reg);
3466 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3470 /* Align the frame and spill all FR registers. */
3471 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3472 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3474 gcc_assert (!(cfa_off & 15));
3475 reg = gen_rtx_REG (XFmode, regno);
3476 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3480 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3482 finish_spill_pointers ();
3485 /* Output the textual info surrounding the prologue. */
3488 ia64_start_function (FILE *file, const char *fnname,
3489 tree decl ATTRIBUTE_UNUSED)
3491 #if VMS_DEBUGGING_INFO
3493 && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
3495 targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
3496 ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
3497 dwarf2out_vms_debug_main_pointer ();
3502 fputs ("\t.proc ", file);
3503 assemble_name (file, fnname);
3505 ASM_OUTPUT_LABEL (file, fnname);
3508 /* Called after register allocation to add any instructions needed for the
3509 epilogue. Using an epilogue insn is favored compared to putting all of the
3510 instructions in output_function_prologue(), since it allows the scheduler
3511 to intermix instructions with the saves of the caller saved registers. In
3512 some cases, it might be necessary to emit a barrier instruction as the last
3513 insn to prevent such scheduling. */
3516 ia64_expand_epilogue (int sibcall_p)
3518 rtx insn, reg, alt_reg, ar_unat_save_reg;
3519 int regno, alt_regno, cfa_off;
3521 ia64_compute_frame_size (get_frame_size ());
3523 /* If there is a frame pointer, then we use it instead of the stack
3524 pointer, so that the stack pointer does not need to be valid when
3525 the epilogue starts. See EXIT_IGNORE_STACK. */
3526 if (frame_pointer_needed)
3527 setup_spill_pointers (current_frame_info.n_spilled,
3528 hard_frame_pointer_rtx, 0);
3530 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3531 current_frame_info.total_size);
3533 if (current_frame_info.total_size != 0)
3535 /* ??? At this point we must generate a magic insn that appears to
3536 modify the spill iterators and the frame pointer. This would
3537 allow the most scheduling freedom. For now, just hard stop. */
3538 emit_insn (gen_blockage ());
3541 /* Locate the bottom of the register save area. */
3542 cfa_off = (current_frame_info.spill_cfa_off
3543 + current_frame_info.spill_size
3544 + current_frame_info.extra_spill_size);
3546 /* Restore the predicate registers. */
3547 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3549 if (current_frame_info.r[reg_save_pr] != 0)
3551 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3552 reg_emitted (reg_save_pr);
3556 alt_regno = next_scratch_gr_reg ();
3557 alt_reg = gen_rtx_REG (DImode, alt_regno);
3558 do_restore (gen_movdi_x, alt_reg, cfa_off);
3561 reg = gen_rtx_REG (DImode, PR_REG (0));
3562 emit_move_insn (reg, alt_reg);
3565 /* Restore the application registers. */
3567 /* Load the saved unat from the stack, but do not restore it until
3568 after the GRs have been restored. */
3569 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3571 if (current_frame_info.r[reg_save_ar_unat] != 0)
3574 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3575 reg_emitted (reg_save_ar_unat);
3579 alt_regno = next_scratch_gr_reg ();
3580 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3581 current_frame_info.gr_used_mask |= 1 << alt_regno;
3582 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3587 ar_unat_save_reg = NULL_RTX;
3589 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3591 reg_emitted (reg_save_ar_pfs);
3592 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3593 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3594 emit_move_insn (reg, alt_reg);
3596 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3598 alt_regno = next_scratch_gr_reg ();
3599 alt_reg = gen_rtx_REG (DImode, alt_regno);
3600 do_restore (gen_movdi_x, alt_reg, cfa_off);
3602 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3603 emit_move_insn (reg, alt_reg);
3606 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3608 if (current_frame_info.r[reg_save_ar_lc] != 0)
3610 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3611 reg_emitted (reg_save_ar_lc);
3615 alt_regno = next_scratch_gr_reg ();
3616 alt_reg = gen_rtx_REG (DImode, alt_regno);
3617 do_restore (gen_movdi_x, alt_reg, cfa_off);
3620 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3621 emit_move_insn (reg, alt_reg);
3624 /* Restore the return pointer. */
3625 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3627 if (current_frame_info.r[reg_save_b0] != 0)
3629 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3630 reg_emitted (reg_save_b0);
3634 alt_regno = next_scratch_gr_reg ();
3635 alt_reg = gen_rtx_REG (DImode, alt_regno);
3636 do_restore (gen_movdi_x, alt_reg, cfa_off);
3639 reg = gen_rtx_REG (DImode, BR_REG (0));
3640 emit_move_insn (reg, alt_reg);
3643 /* We should now be at the base of the gr/br/fr spill area. */
3644 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3645 + current_frame_info.spill_size));
3647 /* The GP may be stored on the stack in the prologue, but it's
3648 never restored in the epilogue. Skip the stack slot. */
3649 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3652 /* Restore all general registers. */
3653 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3654 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3656 reg = gen_rtx_REG (DImode, regno);
3657 do_restore (gen_gr_restore, reg, cfa_off);
3661 /* Restore the branch registers. */
3662 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3663 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3665 alt_regno = next_scratch_gr_reg ();
3666 alt_reg = gen_rtx_REG (DImode, alt_regno);
3667 do_restore (gen_movdi_x, alt_reg, cfa_off);
3669 reg = gen_rtx_REG (DImode, regno);
3670 emit_move_insn (reg, alt_reg);
3673 /* Restore floating point registers. */
3674 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3675 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3677 gcc_assert (!(cfa_off & 15));
3678 reg = gen_rtx_REG (XFmode, regno);
3679 do_restore (gen_fr_restore_x, reg, cfa_off);
3683 /* Restore ar.unat for real. */
3684 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3686 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3687 emit_move_insn (reg, ar_unat_save_reg);
3690 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3692 finish_spill_pointers ();
3694 if (current_frame_info.total_size
3695 || cfun->machine->ia64_eh_epilogue_sp
3696 || frame_pointer_needed)
3698 /* ??? At this point we must generate a magic insn that appears to
3699 modify the spill iterators, the stack pointer, and the frame
3700 pointer. This would allow the most scheduling freedom. For now,
3702 emit_insn (gen_blockage ());
3705 if (cfun->machine->ia64_eh_epilogue_sp)
3706 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3707 else if (frame_pointer_needed)
3709 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3710 RTX_FRAME_RELATED_P (insn) = 1;
3711 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
3713 else if (current_frame_info.total_size)
3715 rtx offset, frame_size_rtx;
3717 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3718 if (satisfies_constraint_I (frame_size_rtx))
3719 offset = frame_size_rtx;
3722 regno = next_scratch_gr_reg ();
3723 offset = gen_rtx_REG (DImode, regno);
3724 emit_move_insn (offset, frame_size_rtx);
3727 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3730 RTX_FRAME_RELATED_P (insn) = 1;
3731 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3732 gen_rtx_SET (VOIDmode,
3734 gen_rtx_PLUS (DImode,
3739 if (cfun->machine->ia64_eh_epilogue_bsp)
3740 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3743 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3746 int fp = GR_REG (2);
3747 /* We need a throw away register here, r0 and r1 are reserved,
3748 so r2 is the first available call clobbered register. If
3749 there was a frame_pointer register, we may have swapped the
3750 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
3751 sure we're using the string "r2" when emitting the register
3752 name for the assembler. */
3753 if (current_frame_info.r[reg_fp]
3754 && current_frame_info.r[reg_fp] == GR_REG (2))
3755 fp = HARD_FRAME_POINTER_REGNUM;
3757 /* We must emit an alloc to force the input registers to become output
3758 registers. Otherwise, if the callee tries to pass its parameters
3759 through to another call without an intervening alloc, then these
3761 /* ??? We don't need to preserve all input registers. We only need to
3762 preserve those input registers used as arguments to the sibling call.
3763 It is unclear how to compute that number here. */
3764 if (current_frame_info.n_input_regs != 0)
3766 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3767 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3768 const0_rtx, const0_rtx,
3769 n_inputs, const0_rtx));
3770 RTX_FRAME_RELATED_P (insn) = 1;
3775 /* Return 1 if br.ret can do all the work required to return from a
3779 ia64_direct_return (void)
3781 if (reload_completed && ! frame_pointer_needed)
3783 ia64_compute_frame_size (get_frame_size ());
3785 return (current_frame_info.total_size == 0
3786 && current_frame_info.n_spilled == 0
3787 && current_frame_info.r[reg_save_b0] == 0
3788 && current_frame_info.r[reg_save_pr] == 0
3789 && current_frame_info.r[reg_save_ar_pfs] == 0
3790 && current_frame_info.r[reg_save_ar_unat] == 0
3791 && current_frame_info.r[reg_save_ar_lc] == 0);
3796 /* Return the magic cookie that we use to hold the return address
3797 during early compilation. */
3800 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3804 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3807 /* Split this value after reload, now that we know where the return
3808 address is saved. */
3811 ia64_split_return_addr_rtx (rtx dest)
3815 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3817 if (current_frame_info.r[reg_save_b0] != 0)
3819 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3820 reg_emitted (reg_save_b0);
3828 /* Compute offset from CFA for BR0. */
3829 /* ??? Must be kept in sync with ia64_expand_prologue. */
3830 off = (current_frame_info.spill_cfa_off
3831 + current_frame_info.spill_size);
3832 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3833 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3836 /* Convert CFA offset to a register based offset. */
3837 if (frame_pointer_needed)
3838 src = hard_frame_pointer_rtx;
3841 src = stack_pointer_rtx;
3842 off += current_frame_info.total_size;
3845 /* Load address into scratch register. */
3846 off_r = GEN_INT (off);
3847 if (satisfies_constraint_I (off_r))
3848 emit_insn (gen_adddi3 (dest, src, off_r));
3851 emit_move_insn (dest, off_r);
3852 emit_insn (gen_adddi3 (dest, src, dest));
3855 src = gen_rtx_MEM (Pmode, dest);
3859 src = gen_rtx_REG (DImode, BR_REG (0));
3861 emit_move_insn (dest, src);
3865 ia64_hard_regno_rename_ok (int from, int to)
3867 /* Don't clobber any of the registers we reserved for the prologue. */
3870 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3871 if (to == current_frame_info.r[r]
3872 || from == current_frame_info.r[r]
3873 || to == emitted_frame_related_regs[r]
3874 || from == emitted_frame_related_regs[r])
3877 /* Don't use output registers outside the register frame. */
3878 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3881 /* Retain even/oddness on predicate register pairs. */
3882 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3883 return (from & 1) == (to & 1);
3888 /* Target hook for assembling integer objects. Handle word-sized
3889 aligned objects and detect the cases when @fptr is needed. */
3892 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3894 if (size == POINTER_SIZE / BITS_PER_UNIT
3895 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3896 && GET_CODE (x) == SYMBOL_REF
3897 && SYMBOL_REF_FUNCTION_P (x))
3899 static const char * const directive[2][2] = {
3900 /* 64-bit pointer */ /* 32-bit pointer */
3901 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3902 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3904 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3905 output_addr_const (asm_out_file, x);
3906 fputs (")\n", asm_out_file);
3909 return default_assemble_integer (x, size, aligned_p);
3912 /* Emit the function prologue. */
3915 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3917 int mask, grsave, grsave_prev;
3919 if (current_frame_info.need_regstk)
3920 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3921 current_frame_info.n_input_regs,
3922 current_frame_info.n_local_regs,
3923 current_frame_info.n_output_regs,
3924 current_frame_info.n_rotate_regs);
3926 if (ia64_except_unwind_info () != UI_TARGET)
3929 /* Emit the .prologue directive. */
3932 grsave = grsave_prev = 0;
3933 if (current_frame_info.r[reg_save_b0] != 0)
3936 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3938 if (current_frame_info.r[reg_save_ar_pfs] != 0
3939 && (grsave_prev == 0
3940 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3943 if (grsave_prev == 0)
3944 grsave = current_frame_info.r[reg_save_ar_pfs];
3945 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3947 if (current_frame_info.r[reg_fp] != 0
3948 && (grsave_prev == 0
3949 || current_frame_info.r[reg_fp] == grsave_prev + 1))
3952 if (grsave_prev == 0)
3953 grsave = HARD_FRAME_POINTER_REGNUM;
3954 grsave_prev = current_frame_info.r[reg_fp];
3956 if (current_frame_info.r[reg_save_pr] != 0
3957 && (grsave_prev == 0
3958 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
3961 if (grsave_prev == 0)
3962 grsave = current_frame_info.r[reg_save_pr];
3965 if (mask && TARGET_GNU_AS)
3966 fprintf (file, "\t.prologue %d, %d\n", mask,
3967 ia64_dbx_register_number (grsave));
3969 fputs ("\t.prologue\n", file);
3971 /* Emit a .spill directive, if necessary, to relocate the base of
3972 the register spill area. */
3973 if (current_frame_info.spill_cfa_off != -16)
3974 fprintf (file, "\t.spill %ld\n",
3975 (long) (current_frame_info.spill_cfa_off
3976 + current_frame_info.spill_size));
3979 /* Emit the .body directive at the scheduled end of the prologue. */
3982 ia64_output_function_end_prologue (FILE *file)
3984 if (ia64_except_unwind_info () != UI_TARGET)
3987 fputs ("\t.body\n", file);
3990 /* Emit the function epilogue. */
3993 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3994 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3998 if (current_frame_info.r[reg_fp])
4000 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4001 reg_names[HARD_FRAME_POINTER_REGNUM]
4002 = reg_names[current_frame_info.r[reg_fp]];
4003 reg_names[current_frame_info.r[reg_fp]] = tmp;
4004 reg_emitted (reg_fp);
4006 if (! TARGET_REG_NAMES)
4008 for (i = 0; i < current_frame_info.n_input_regs; i++)
4009 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4010 for (i = 0; i < current_frame_info.n_local_regs; i++)
4011 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4012 for (i = 0; i < current_frame_info.n_output_regs; i++)
4013 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4016 current_frame_info.initialized = 0;
4020 ia64_dbx_register_number (int regno)
4022 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4023 from its home at loc79 to something inside the register frame. We
4024 must perform the same renumbering here for the debug info. */
4025 if (current_frame_info.r[reg_fp])
4027 if (regno == HARD_FRAME_POINTER_REGNUM)
4028 regno = current_frame_info.r[reg_fp];
4029 else if (regno == current_frame_info.r[reg_fp])
4030 regno = HARD_FRAME_POINTER_REGNUM;
4033 if (IN_REGNO_P (regno))
4034 return 32 + regno - IN_REG (0);
4035 else if (LOC_REGNO_P (regno))
4036 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4037 else if (OUT_REGNO_P (regno))
4038 return (32 + current_frame_info.n_input_regs
4039 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4044 /* Implement TARGET_TRAMPOLINE_INIT.
4046 The trampoline should set the static chain pointer to value placed
4047 into the trampoline and should branch to the specified routine.
4048 To make the normal indirect-subroutine calling convention work,
4049 the trampoline must look like a function descriptor; the first
4050 word being the target address and the second being the target's
4053 We abuse the concept of a global pointer by arranging for it
4054 to point to the data we need to load. The complete trampoline
4055 has the following form:
4057 +-------------------+ \
4058 TRAMP: | __ia64_trampoline | |
4059 +-------------------+ > fake function descriptor
4061 +-------------------+ /
4062 | target descriptor |
4063 +-------------------+
4065 +-------------------+
4069 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4071 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4072 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4074 /* The Intel assembler requires that the global __ia64_trampoline symbol
4075 be declared explicitly */
4078 static bool declared_ia64_trampoline = false;
4080 if (!declared_ia64_trampoline)
4082 declared_ia64_trampoline = true;
4083 (*targetm.asm_out.globalize_label) (asm_out_file,
4084 "__ia64_trampoline");
4088 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4089 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4090 fnaddr = convert_memory_address (Pmode, fnaddr);
4091 static_chain = convert_memory_address (Pmode, static_chain);
4093 /* Load up our iterator. */
4094 addr_reg = copy_to_reg (addr);
4095 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4097 /* The first two words are the fake descriptor:
4098 __ia64_trampoline, ADDR+16. */
4099 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4100 if (TARGET_ABI_OPEN_VMS)
4102 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4103 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4104 relocation against function symbols to make it identical to the
4105 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4106 strict ELF and dereference to get the bare code address. */
4107 rtx reg = gen_reg_rtx (Pmode);
4108 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4109 emit_move_insn (reg, tramp);
4110 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4113 emit_move_insn (m_tramp, tramp);
4114 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4115 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4117 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4118 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4119 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4121 /* The third word is the target descriptor. */
4122 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4123 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4124 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4126 /* The fourth word is the static chain. */
4127 emit_move_insn (m_tramp, static_chain);
4130 /* Do any needed setup for a variadic function. CUM has not been updated
4131 for the last named argument which has type TYPE and mode MODE.
4133 We generate the actual spill instructions during prologue generation. */
4136 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4137 tree type, int * pretend_size,
4138 int second_time ATTRIBUTE_UNUSED)
4140 CUMULATIVE_ARGS next_cum = *cum;
4142 /* Skip the current argument. */
4143 ia64_function_arg_advance (&next_cum, mode, type, 1);
4145 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4147 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4148 *pretend_size = n * UNITS_PER_WORD;
4149 cfun->machine->n_varargs = n;
4153 /* Check whether TYPE is a homogeneous floating point aggregate. If
4154 it is, return the mode of the floating point type that appears
4155 in all leafs. If it is not, return VOIDmode.
4157 An aggregate is a homogeneous floating point aggregate is if all
4158 fields/elements in it have the same floating point type (e.g,
4159 SFmode). 128-bit quad-precision floats are excluded.
4161 Variable sized aggregates should never arrive here, since we should
4162 have already decided to pass them by reference. Top-level zero-sized
4163 aggregates are excluded because our parallels crash the middle-end. */
4165 static enum machine_mode
4166 hfa_element_mode (const_tree type, bool nested)
4168 enum machine_mode element_mode = VOIDmode;
4169 enum machine_mode mode;
4170 enum tree_code code = TREE_CODE (type);
4171 int know_element_mode = 0;
4174 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4179 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4180 case BOOLEAN_TYPE: case POINTER_TYPE:
4181 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4182 case LANG_TYPE: case FUNCTION_TYPE:
4185 /* Fortran complex types are supposed to be HFAs, so we need to handle
4186 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4189 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4190 && TYPE_MODE (type) != TCmode)
4191 return GET_MODE_INNER (TYPE_MODE (type));
4196 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4197 mode if this is contained within an aggregate. */
4198 if (nested && TYPE_MODE (type) != TFmode)
4199 return TYPE_MODE (type);
4204 return hfa_element_mode (TREE_TYPE (type), 1);
4208 case QUAL_UNION_TYPE:
4209 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4211 if (TREE_CODE (t) != FIELD_DECL)
4214 mode = hfa_element_mode (TREE_TYPE (t), 1);
4215 if (know_element_mode)
4217 if (mode != element_mode)
4220 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4224 know_element_mode = 1;
4225 element_mode = mode;
4228 return element_mode;
4231 /* If we reach here, we probably have some front-end specific type
4232 that the backend doesn't know about. This can happen via the
4233 aggregate_value_p call in init_function_start. All we can do is
4234 ignore unknown tree types. */
4241 /* Return the number of words required to hold a quantity of TYPE and MODE
4242 when passed as an argument. */
4244 ia64_function_arg_words (tree type, enum machine_mode mode)
4248 if (mode == BLKmode)
4249 words = int_size_in_bytes (type);
4251 words = GET_MODE_SIZE (mode);
4253 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4256 /* Return the number of registers that should be skipped so the current
4257 argument (described by TYPE and WORDS) will be properly aligned.
4259 Integer and float arguments larger than 8 bytes start at the next
4260 even boundary. Aggregates larger than 8 bytes start at the next
4261 even boundary if the aggregate has 16 byte alignment. Note that
4262 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4263 but are still to be aligned in registers.
4265 ??? The ABI does not specify how to handle aggregates with
4266 alignment from 9 to 15 bytes, or greater than 16. We handle them
4267 all as if they had 16 byte alignment. Such aggregates can occur
4268 only if gcc extensions are used. */
4270 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4272 /* No registers are skipped on VMS. */
4273 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4277 && TREE_CODE (type) != INTEGER_TYPE
4278 && TREE_CODE (type) != REAL_TYPE)
4279 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4284 /* Return rtx for register where argument is passed, or zero if it is passed
4286 /* ??? 128-bit quad-precision floats are always passed in general
4290 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4291 int named, int incoming)
4293 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4294 int words = ia64_function_arg_words (type, mode);
4295 int offset = ia64_function_arg_offset (cum, type, words);
4296 enum machine_mode hfa_mode = VOIDmode;
4298 /* For OPEN VMS, emit the instruction setting up the argument register here,
4299 when we know this will be together with the other arguments setup related
4300 insns. This is not the conceptually best place to do this, but this is
4301 the easiest as we have convenient access to cumulative args info. */
4303 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4306 unsigned HOST_WIDE_INT regval = cum->words;
4309 for (i = 0; i < 8; i++)
4310 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4312 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4316 /* If all argument slots are used, then it must go on the stack. */
4317 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4320 /* Check for and handle homogeneous FP aggregates. */
4322 hfa_mode = hfa_element_mode (type, 0);
4324 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4325 and unprototyped hfas are passed specially. */
4326 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4330 int fp_regs = cum->fp_regs;
4331 int int_regs = cum->words + offset;
4332 int hfa_size = GET_MODE_SIZE (hfa_mode);
4336 /* If prototyped, pass it in FR regs then GR regs.
4337 If not prototyped, pass it in both FR and GR regs.
4339 If this is an SFmode aggregate, then it is possible to run out of
4340 FR regs while GR regs are still left. In that case, we pass the
4341 remaining part in the GR regs. */
4343 /* Fill the FP regs. We do this always. We stop if we reach the end
4344 of the argument, the last FP register, or the last argument slot. */
4346 byte_size = ((mode == BLKmode)
4347 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4348 args_byte_size = int_regs * UNITS_PER_WORD;
4350 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4351 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4353 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4354 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4358 args_byte_size += hfa_size;
4362 /* If no prototype, then the whole thing must go in GR regs. */
4363 if (! cum->prototype)
4365 /* If this is an SFmode aggregate, then we might have some left over
4366 that needs to go in GR regs. */
4367 else if (byte_size != offset)
4368 int_regs += offset / UNITS_PER_WORD;
4370 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4372 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4374 enum machine_mode gr_mode = DImode;
4375 unsigned int gr_size;
4377 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4378 then this goes in a GR reg left adjusted/little endian, right
4379 adjusted/big endian. */
4380 /* ??? Currently this is handled wrong, because 4-byte hunks are
4381 always right adjusted/little endian. */
4384 /* If we have an even 4 byte hunk because the aggregate is a
4385 multiple of 4 bytes in size, then this goes in a GR reg right
4386 adjusted/little endian. */
4387 else if (byte_size - offset == 4)
4390 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4391 gen_rtx_REG (gr_mode, (basereg
4395 gr_size = GET_MODE_SIZE (gr_mode);
4397 if (gr_size == UNITS_PER_WORD
4398 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4400 else if (gr_size > UNITS_PER_WORD)
4401 int_regs += gr_size / UNITS_PER_WORD;
4403 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4406 /* On OpenVMS variable argument is either in Rn or Fn. */
4407 else if (TARGET_ABI_OPEN_VMS && named == 0)
4409 if (FLOAT_MODE_P (mode))
4410 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4412 return gen_rtx_REG (mode, basereg + cum->words);
4415 /* Integral and aggregates go in general registers. If we have run out of
4416 FR registers, then FP values must also go in general registers. This can
4417 happen when we have a SFmode HFA. */
4418 else if (mode == TFmode || mode == TCmode
4419 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4421 int byte_size = ((mode == BLKmode)
4422 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4423 if (BYTES_BIG_ENDIAN
4424 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4425 && byte_size < UNITS_PER_WORD
4428 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4429 gen_rtx_REG (DImode,
4430 (basereg + cum->words
4433 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4436 return gen_rtx_REG (mode, basereg + cum->words + offset);
4440 /* If there is a prototype, then FP values go in a FR register when
4441 named, and in a GR register when unnamed. */
4442 else if (cum->prototype)
4445 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4446 /* In big-endian mode, an anonymous SFmode value must be represented
4447 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4448 the value into the high half of the general register. */
4449 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4450 return gen_rtx_PARALLEL (mode,
4452 gen_rtx_EXPR_LIST (VOIDmode,
4453 gen_rtx_REG (DImode, basereg + cum->words + offset),
4456 return gen_rtx_REG (mode, basereg + cum->words + offset);
4458 /* If there is no prototype, then FP values go in both FR and GR
4462 /* See comment above. */
4463 enum machine_mode inner_mode =
4464 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4466 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4467 gen_rtx_REG (mode, (FR_ARG_FIRST
4470 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4471 gen_rtx_REG (inner_mode,
4472 (basereg + cum->words
4476 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4480 /* Return number of bytes, at the beginning of the argument, that must be
4481 put in registers. 0 is the argument is entirely in registers or entirely
4485 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4486 tree type, bool named ATTRIBUTE_UNUSED)
4488 int words = ia64_function_arg_words (type, mode);
4489 int offset = ia64_function_arg_offset (cum, type, words);
4491 /* If all argument slots are used, then it must go on the stack. */
4492 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4495 /* It doesn't matter whether the argument goes in FR or GR regs. If
4496 it fits within the 8 argument slots, then it goes entirely in
4497 registers. If it extends past the last argument slot, then the rest
4498 goes on the stack. */
4500 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4503 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4506 /* Return ivms_arg_type based on machine_mode. */
4508 static enum ivms_arg_type
4509 ia64_arg_type (enum machine_mode mode)
4522 /* Update CUM to point after this argument. This is patterned after
4523 ia64_function_arg. */
4526 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4527 tree type, int named)
4529 int words = ia64_function_arg_words (type, mode);
4530 int offset = ia64_function_arg_offset (cum, type, words);
4531 enum machine_mode hfa_mode = VOIDmode;
4533 /* If all arg slots are already full, then there is nothing to do. */
4534 if (cum->words >= MAX_ARGUMENT_SLOTS)
4536 cum->words += words + offset;
4540 cum->atypes[cum->words] = ia64_arg_type (mode);
4541 cum->words += words + offset;
4543 /* Check for and handle homogeneous FP aggregates. */
4545 hfa_mode = hfa_element_mode (type, 0);
4547 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4548 and unprototyped hfas are passed specially. */
4549 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4551 int fp_regs = cum->fp_regs;
4552 /* This is the original value of cum->words + offset. */
4553 int int_regs = cum->words - words;
4554 int hfa_size = GET_MODE_SIZE (hfa_mode);
4558 /* If prototyped, pass it in FR regs then GR regs.
4559 If not prototyped, pass it in both FR and GR regs.
4561 If this is an SFmode aggregate, then it is possible to run out of
4562 FR regs while GR regs are still left. In that case, we pass the
4563 remaining part in the GR regs. */
4565 /* Fill the FP regs. We do this always. We stop if we reach the end
4566 of the argument, the last FP register, or the last argument slot. */
4568 byte_size = ((mode == BLKmode)
4569 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4570 args_byte_size = int_regs * UNITS_PER_WORD;
4572 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4573 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4576 args_byte_size += hfa_size;
4580 cum->fp_regs = fp_regs;
4583 /* On OpenVMS variable argument is either in Rn or Fn. */
4584 else if (TARGET_ABI_OPEN_VMS && named == 0)
4586 cum->int_regs = cum->words;
4587 cum->fp_regs = cum->words;
4590 /* Integral and aggregates go in general registers. So do TFmode FP values.
4591 If we have run out of FR registers, then other FP values must also go in
4592 general registers. This can happen when we have a SFmode HFA. */
4593 else if (mode == TFmode || mode == TCmode
4594 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4595 cum->int_regs = cum->words;
4597 /* If there is a prototype, then FP values go in a FR register when
4598 named, and in a GR register when unnamed. */
4599 else if (cum->prototype)
4602 cum->int_regs = cum->words;
4604 /* ??? Complex types should not reach here. */
4605 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4607 /* If there is no prototype, then FP values go in both FR and GR
4611 /* ??? Complex types should not reach here. */
4612 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4613 cum->int_regs = cum->words;
4617 /* Arguments with alignment larger than 8 bytes start at the next even
4618 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4619 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4622 ia64_function_arg_boundary (enum machine_mode mode, tree type)
4625 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4626 return PARM_BOUNDARY * 2;
4630 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4631 return PARM_BOUNDARY * 2;
4633 return PARM_BOUNDARY;
4636 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4637 return PARM_BOUNDARY * 2;
4639 return PARM_BOUNDARY;
4642 /* True if it is OK to do sibling call optimization for the specified
4643 call expression EXP. DECL will be the called function, or NULL if
4644 this is an indirect call. */
4646 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4648 /* We can't perform a sibcall if the current function has the syscall_linkage
4650 if (lookup_attribute ("syscall_linkage",
4651 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4654 /* We must always return with our current GP. This means we can
4655 only sibcall to functions defined in the current module unless
4656 TARGET_CONST_GP is set to true. */
4657 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4661 /* Implement va_arg. */
4664 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4667 /* Variable sized types are passed by reference. */
4668 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4670 tree ptrtype = build_pointer_type (type);
4671 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4672 return build_va_arg_indirect_ref (addr);
4675 /* Aggregate arguments with alignment larger than 8 bytes start at
4676 the next even boundary. Integer and floating point arguments
4677 do so if they are larger than 8 bytes, whether or not they are
4678 also aligned larger than 8 bytes. */
4679 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4680 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4682 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4683 size_int (2 * UNITS_PER_WORD - 1));
4684 t = fold_convert (sizetype, t);
4685 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4686 size_int (-2 * UNITS_PER_WORD));
4687 t = fold_convert (TREE_TYPE (valist), t);
4688 gimplify_assign (unshare_expr (valist), t, pre_p);
4691 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4694 /* Return 1 if function return value returned in memory. Return 0 if it is
4698 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4700 enum machine_mode mode;
4701 enum machine_mode hfa_mode;
4702 HOST_WIDE_INT byte_size;
4704 mode = TYPE_MODE (valtype);
4705 byte_size = GET_MODE_SIZE (mode);
4706 if (mode == BLKmode)
4708 byte_size = int_size_in_bytes (valtype);
4713 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4715 hfa_mode = hfa_element_mode (valtype, 0);
4716 if (hfa_mode != VOIDmode)
4718 int hfa_size = GET_MODE_SIZE (hfa_mode);
4720 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4725 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4731 /* Return rtx for register that holds the function return value. */
4734 ia64_function_value (const_tree valtype,
4735 const_tree fn_decl_or_type,
4736 bool outgoing ATTRIBUTE_UNUSED)
4738 enum machine_mode mode;
4739 enum machine_mode hfa_mode;
4741 const_tree func = fn_decl_or_type;
4744 && !DECL_P (fn_decl_or_type))
4747 mode = TYPE_MODE (valtype);
4748 hfa_mode = hfa_element_mode (valtype, 0);
4750 if (hfa_mode != VOIDmode)
4758 hfa_size = GET_MODE_SIZE (hfa_mode);
4759 byte_size = ((mode == BLKmode)
4760 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4762 for (i = 0; offset < byte_size; i++)
4764 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4765 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4769 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4771 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4772 return gen_rtx_REG (mode, FR_ARG_FIRST);
4775 bool need_parallel = false;
4777 /* In big-endian mode, we need to manage the layout of aggregates
4778 in the registers so that we get the bits properly aligned in
4779 the highpart of the registers. */
4780 if (BYTES_BIG_ENDIAN
4781 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4782 need_parallel = true;
4784 /* Something like struct S { long double x; char a[0] } is not an
4785 HFA structure, and therefore doesn't go in fp registers. But
4786 the middle-end will give it XFmode anyway, and XFmode values
4787 don't normally fit in integer registers. So we need to smuggle
4788 the value inside a parallel. */
4789 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4790 need_parallel = true;
4800 bytesize = int_size_in_bytes (valtype);
4801 /* An empty PARALLEL is invalid here, but the return value
4802 doesn't matter for empty structs. */
4804 return gen_rtx_REG (mode, GR_RET_FIRST);
4805 for (i = 0; offset < bytesize; i++)
4807 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4808 gen_rtx_REG (DImode,
4811 offset += UNITS_PER_WORD;
4813 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4816 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4817 func ? TREE_TYPE (func) : NULL_TREE,
4820 return gen_rtx_REG (mode, GR_RET_FIRST);
4824 /* Worker function for TARGET_LIBCALL_VALUE. */
4827 ia64_libcall_value (enum machine_mode mode,
4828 const_rtx fun ATTRIBUTE_UNUSED)
4830 return gen_rtx_REG (mode,
4831 (((GET_MODE_CLASS (mode) == MODE_FLOAT
4832 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4833 && (mode) != TFmode)
4834 ? FR_RET_FIRST : GR_RET_FIRST));
4837 /* Worker function for FUNCTION_VALUE_REGNO_P. */
4840 ia64_function_value_regno_p (const unsigned int regno)
4842 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
4843 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
4846 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4847 We need to emit DTP-relative relocations. */
4850 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4852 gcc_assert (size == 4 || size == 8);
4854 fputs ("\tdata4.ua\t@dtprel(", file);
4856 fputs ("\tdata8.ua\t@dtprel(", file);
4857 output_addr_const (file, x);
4861 /* Print a memory address as an operand to reference that memory location. */
4863 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4864 also call this from ia64_print_operand for memory addresses. */
4867 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4868 rtx address ATTRIBUTE_UNUSED)
4872 /* Print an operand to an assembler instruction.
4873 C Swap and print a comparison operator.
4874 D Print an FP comparison operator.
4875 E Print 32 - constant, for SImode shifts as extract.
4876 e Print 64 - constant, for DImode rotates.
4877 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4878 a floating point register emitted normally.
4879 G A floating point constant.
4880 I Invert a predicate register by adding 1.
4881 J Select the proper predicate register for a condition.
4882 j Select the inverse predicate register for a condition.
4883 O Append .acq for volatile load.
4884 P Postincrement of a MEM.
4885 Q Append .rel for volatile store.
4886 R Print .s .d or nothing for a single, double or no truncation.
4887 S Shift amount for shladd instruction.
4888 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4889 for Intel assembler.
4890 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4891 for Intel assembler.
4892 X A pair of floating point registers.
4893 r Print register name, or constant 0 as r0. HP compatibility for
4895 v Print vector constant value as an 8-byte integer value. */
4898 ia64_print_operand (FILE * file, rtx x, int code)
4905 /* Handled below. */
4910 enum rtx_code c = swap_condition (GET_CODE (x));
4911 fputs (GET_RTX_NAME (c), file);
4916 switch (GET_CODE (x))
4940 str = GET_RTX_NAME (GET_CODE (x));
4947 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4951 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4955 if (x == CONST0_RTX (GET_MODE (x)))
4956 str = reg_names [FR_REG (0)];
4957 else if (x == CONST1_RTX (GET_MODE (x)))
4958 str = reg_names [FR_REG (1)];
4961 gcc_assert (GET_CODE (x) == REG);
4962 str = reg_names [REGNO (x)];
4971 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4972 real_to_target (val, &rv, GET_MODE (x));
4973 if (GET_MODE (x) == SFmode)
4974 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4975 else if (GET_MODE (x) == DFmode)
4976 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4978 (WORDS_BIG_ENDIAN ? val[1] : val[0])
4981 output_operand_lossage ("invalid %%G mode");
4986 fputs (reg_names [REGNO (x) + 1], file);
4992 unsigned int regno = REGNO (XEXP (x, 0));
4993 if (GET_CODE (x) == EQ)
4997 fputs (reg_names [regno], file);
5002 if (MEM_VOLATILE_P (x))
5003 fputs(".acq", file);
5008 HOST_WIDE_INT value;
5010 switch (GET_CODE (XEXP (x, 0)))
5016 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5017 if (GET_CODE (x) == CONST_INT)
5021 gcc_assert (GET_CODE (x) == REG);
5022 fprintf (file, ", %s", reg_names[REGNO (x)]);
5028 value = GET_MODE_SIZE (GET_MODE (x));
5032 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5036 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5041 if (MEM_VOLATILE_P (x))
5042 fputs(".rel", file);
5046 if (x == CONST0_RTX (GET_MODE (x)))
5048 else if (x == CONST1_RTX (GET_MODE (x)))
5050 else if (x == CONST2_RTX (GET_MODE (x)))
5053 output_operand_lossage ("invalid %%R value");
5057 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5061 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5063 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5069 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5071 const char *prefix = "0x";
5072 if (INTVAL (x) & 0x80000000)
5074 fprintf (file, "0xffffffff");
5077 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5084 unsigned int regno = REGNO (x);
5085 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5090 /* If this operand is the constant zero, write it as register zero.
5091 Any register, zero, or CONST_INT value is OK here. */
5092 if (GET_CODE (x) == REG)
5093 fputs (reg_names[REGNO (x)], file);
5094 else if (x == CONST0_RTX (GET_MODE (x)))
5096 else if (GET_CODE (x) == CONST_INT)
5097 output_addr_const (file, x);
5099 output_operand_lossage ("invalid %%r value");
5103 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5104 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5111 /* For conditional branches, returns or calls, substitute
5112 sptk, dptk, dpnt, or spnt for %s. */
5113 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5116 int pred_val = INTVAL (XEXP (x, 0));
5118 /* Guess top and bottom 10% statically predicted. */
5119 if (pred_val < REG_BR_PROB_BASE / 50
5120 && br_prob_note_reliable_p (x))
5122 else if (pred_val < REG_BR_PROB_BASE / 2)
5124 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5125 || !br_prob_note_reliable_p (x))
5130 else if (GET_CODE (current_output_insn) == CALL_INSN)
5135 fputs (which, file);
5140 x = current_insn_predicate;
5143 unsigned int regno = REGNO (XEXP (x, 0));
5144 if (GET_CODE (x) == EQ)
5146 fprintf (file, "(%s) ", reg_names [regno]);
5151 output_operand_lossage ("ia64_print_operand: unknown code");
5155 switch (GET_CODE (x))
5157 /* This happens for the spill/restore instructions. */
5162 /* ... fall through ... */
5165 fputs (reg_names [REGNO (x)], file);
5170 rtx addr = XEXP (x, 0);
5171 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5172 addr = XEXP (addr, 0);
5173 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5178 output_addr_const (file, x);
5185 /* Compute a (partial) cost for rtx X. Return true if the complete
5186 cost has been computed, and false if subexpressions should be
5187 scanned. In either case, *TOTAL contains the cost result. */
5188 /* ??? This is incomplete. */
5191 ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5192 bool speed ATTRIBUTE_UNUSED)
5200 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5203 if (satisfies_constraint_I (x))
5205 else if (satisfies_constraint_J (x))
5208 *total = COSTS_N_INSNS (1);
5211 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5214 *total = COSTS_N_INSNS (1);
5219 *total = COSTS_N_INSNS (1);
5225 *total = COSTS_N_INSNS (3);
5229 /* For multiplies wider than HImode, we have to go to the FPU,
5230 which normally involves copies. Plus there's the latency
5231 of the multiply itself, and the latency of the instructions to
5232 transfer integer regs to FP regs. */
5233 /* ??? Check for FP mode. */
5234 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5235 *total = COSTS_N_INSNS (10);
5237 *total = COSTS_N_INSNS (2);
5245 *total = COSTS_N_INSNS (1);
5252 /* We make divide expensive, so that divide-by-constant will be
5253 optimized to a multiply. */
5254 *total = COSTS_N_INSNS (60);
5262 /* Calculate the cost of moving data from a register in class FROM to
5263 one in class TO, using MODE. */
5266 ia64_register_move_cost (enum machine_mode mode, reg_class_t from_i,
5269 enum reg_class from = (enum reg_class) from_i;
5270 enum reg_class to = (enum reg_class) to_i;
5272 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5273 if (to == ADDL_REGS)
5275 if (from == ADDL_REGS)
5278 /* All costs are symmetric, so reduce cases by putting the
5279 lower number class as the destination. */
5282 enum reg_class tmp = to;
5283 to = from, from = tmp;
5286 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5287 so that we get secondary memory reloads. Between FR_REGS,
5288 we have to make this at least as expensive as memory_move_cost
5289 to avoid spectacularly poor register class preferencing. */
5290 if (mode == XFmode || mode == RFmode)
5292 if (to != GR_REGS || from != GR_REGS)
5293 return memory_move_cost (mode, to, false);
5301 /* Moving between PR registers takes two insns. */
5302 if (from == PR_REGS)
5304 /* Moving between PR and anything but GR is impossible. */
5305 if (from != GR_REGS)
5306 return memory_move_cost (mode, to, false);
5310 /* Moving between BR and anything but GR is impossible. */
5311 if (from != GR_REGS && from != GR_AND_BR_REGS)
5312 return memory_move_cost (mode, to, false);
5317 /* Moving between AR and anything but GR is impossible. */
5318 if (from != GR_REGS)
5319 return memory_move_cost (mode, to, false);
5325 case GR_AND_FR_REGS:
5326 case GR_AND_BR_REGS:
5337 /* Calculate the cost of moving data of MODE from a register to or from
5341 ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5343 bool in ATTRIBUTE_UNUSED)
5345 if (rclass == GENERAL_REGS
5346 || rclass == FR_REGS
5347 || rclass == FP_REGS
5348 || rclass == GR_AND_FR_REGS)
5354 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5355 on RCLASS to use when copying X into that class. */
5358 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5364 /* Don't allow volatile mem reloads into floating point registers.
5365 This is defined to force reload to choose the r/m case instead
5366 of the f/f case when reloading (set (reg fX) (mem/v)). */
5367 if (MEM_P (x) && MEM_VOLATILE_P (x))
5370 /* Force all unrecognized constants into the constant pool. */
5388 /* This function returns the register class required for a secondary
5389 register when copying between one of the registers in RCLASS, and X,
5390 using MODE. A return value of NO_REGS means that no secondary register
5394 ia64_secondary_reload_class (enum reg_class rclass,
5395 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5399 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5400 regno = true_regnum (x);
5407 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5408 interaction. We end up with two pseudos with overlapping lifetimes
5409 both of which are equiv to the same constant, and both which need
5410 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5411 changes depending on the path length, which means the qty_first_reg
5412 check in make_regs_eqv can give different answers at different times.
5413 At some point I'll probably need a reload_indi pattern to handle
5416 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5417 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5418 non-general registers for good measure. */
5419 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5422 /* This is needed if a pseudo used as a call_operand gets spilled to a
5424 if (GET_CODE (x) == MEM)
5430 /* Need to go through general registers to get to other class regs. */
5431 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5434 /* This can happen when a paradoxical subreg is an operand to the
5436 /* ??? This shouldn't be necessary after instruction scheduling is
5437 enabled, because paradoxical subregs are not accepted by
5438 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5439 stop the paradoxical subreg stupidity in the *_operand functions
5441 if (GET_CODE (x) == MEM
5442 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5443 || GET_MODE (x) == QImode))
5446 /* This can happen because of the ior/and/etc patterns that accept FP
5447 registers as operands. If the third operand is a constant, then it
5448 needs to be reloaded into a FP register. */
5449 if (GET_CODE (x) == CONST_INT)
5452 /* This can happen because of register elimination in a muldi3 insn.
5453 E.g. `26107 * (unsigned long)&u'. */
5454 if (GET_CODE (x) == PLUS)
5459 /* ??? This happens if we cse/gcse a BImode value across a call,
5460 and the function has a nonlocal goto. This is because global
5461 does not allocate call crossing pseudos to hard registers when
5462 crtl->has_nonlocal_goto is true. This is relatively
5463 common for C++ programs that use exceptions. To reproduce,
5464 return NO_REGS and compile libstdc++. */
5465 if (GET_CODE (x) == MEM)
5468 /* This can happen when we take a BImode subreg of a DImode value,
5469 and that DImode value winds up in some non-GR register. */
5470 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5482 /* Implement targetm.unspec_may_trap_p hook. */
5484 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5486 if (GET_CODE (x) == UNSPEC)
5488 switch (XINT (x, 1))
5494 case UNSPEC_CHKACLR:
5496 /* These unspecs are just wrappers. */
5497 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5501 return default_unspec_may_trap_p (x, flags);
5505 /* Parse the -mfixed-range= option string. */
5508 fix_range (const char *const_str)
5511 char *str, *dash, *comma;
5513 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5514 REG2 are either register names or register numbers. The effect
5515 of this option is to mark the registers in the range from REG1 to
5516 REG2 as ``fixed'' so they won't be used by the compiler. This is
5517 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5519 i = strlen (const_str);
5520 str = (char *) alloca (i + 1);
5521 memcpy (str, const_str, i + 1);
5525 dash = strchr (str, '-');
5528 warning (0, "value of -mfixed-range must have form REG1-REG2");
5533 comma = strchr (dash + 1, ',');
5537 first = decode_reg_name (str);
5540 warning (0, "unknown register name: %s", str);
5544 last = decode_reg_name (dash + 1);
5547 warning (0, "unknown register name: %s", dash + 1);
5555 warning (0, "%s-%s is an empty range", str, dash + 1);
5559 for (i = first; i <= last; ++i)
5560 fixed_regs[i] = call_used_regs[i] = 1;
5570 /* Implement TARGET_HANDLE_OPTION. */
5573 ia64_handle_option (size_t code, const char *arg, int value)
5577 case OPT_mfixed_range_:
5581 case OPT_mtls_size_:
5582 if (value != 14 && value != 22 && value != 64)
5583 error ("bad value %<%s%> for -mtls-size= switch", arg);
5590 const char *name; /* processor name or nickname. */
5591 enum processor_type processor;
5593 const processor_alias_table[] =
5595 {"itanium2", PROCESSOR_ITANIUM2},
5596 {"mckinley", PROCESSOR_ITANIUM2},
5598 int const pta_size = ARRAY_SIZE (processor_alias_table);
5601 for (i = 0; i < pta_size; i++)
5602 if (!strcmp (arg, processor_alias_table[i].name))
5604 ia64_tune = processor_alias_table[i].processor;
5608 error ("bad value %<%s%> for -mtune= switch", arg);
5617 /* Implement TARGET_OPTION_OVERRIDE. */
5620 ia64_option_override (void)
5622 if (TARGET_AUTO_PIC)
5623 target_flags |= MASK_CONST_GP;
5625 /* Numerous experiment shows that IRA based loop pressure
5626 calculation works better for RTL loop invariant motion on targets
5627 with enough (>= 32) registers. It is an expensive optimization.
5628 So it is on only for peak performance. */
5630 flag_ira_loop_pressure = 1;
5633 ia64_section_threshold = (global_options_set.x_g_switch_value
5635 : IA64_DEFAULT_GVALUE);
5637 init_machine_status = ia64_init_machine_status;
5639 if (align_functions <= 0)
5640 align_functions = 64;
5641 if (align_loops <= 0)
5643 if (TARGET_ABI_OPEN_VMS)
5646 ia64_override_options_after_change();
5649 /* Implement targetm.override_options_after_change. */
5652 ia64_override_options_after_change (void)
5654 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5655 flag_schedule_insns_after_reload = 0;
5658 && !global_options_set.x_flag_selective_scheduling
5659 && !global_options_set.x_flag_selective_scheduling2)
5661 flag_selective_scheduling2 = 1;
5662 flag_sel_sched_pipelining = 1;
5664 if (mflag_sched_control_spec == 2)
5666 /* Control speculation is on by default for the selective scheduler,
5667 but not for the Haifa scheduler. */
5668 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5670 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5672 /* FIXME: remove this when we'd implement breaking autoinsns as
5673 a transformation. */
5674 flag_auto_inc_dec = 0;
5678 /* Initialize the record of emitted frame related registers. */
5680 void ia64_init_expanders (void)
5682 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5685 static struct machine_function *
5686 ia64_init_machine_status (void)
5688 return ggc_alloc_cleared_machine_function ();
5691 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5692 static enum attr_type ia64_safe_type (rtx);
5694 static enum attr_itanium_class
5695 ia64_safe_itanium_class (rtx insn)
5697 if (recog_memoized (insn) >= 0)
5698 return get_attr_itanium_class (insn);
5699 else if (DEBUG_INSN_P (insn))
5700 return ITANIUM_CLASS_IGNORE;
5702 return ITANIUM_CLASS_UNKNOWN;
5705 static enum attr_type
5706 ia64_safe_type (rtx insn)
5708 if (recog_memoized (insn) >= 0)
5709 return get_attr_type (insn);
5711 return TYPE_UNKNOWN;
5714 /* The following collection of routines emit instruction group stop bits as
5715 necessary to avoid dependencies. */
5717 /* Need to track some additional registers as far as serialization is
5718 concerned so we can properly handle br.call and br.ret. We could
5719 make these registers visible to gcc, but since these registers are
5720 never explicitly used in gcc generated code, it seems wasteful to
5721 do so (plus it would make the call and return patterns needlessly
5723 #define REG_RP (BR_REG (0))
5724 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5725 /* This is used for volatile asms which may require a stop bit immediately
5726 before and after them. */
5727 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5728 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5729 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5731 /* For each register, we keep track of how it has been written in the
5732 current instruction group.
5734 If a register is written unconditionally (no qualifying predicate),
5735 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5737 If a register is written if its qualifying predicate P is true, we
5738 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5739 may be written again by the complement of P (P^1) and when this happens,
5740 WRITE_COUNT gets set to 2.
5742 The result of this is that whenever an insn attempts to write a register
5743 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5745 If a predicate register is written by a floating-point insn, we set
5746 WRITTEN_BY_FP to true.
5748 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5749 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5751 #if GCC_VERSION >= 4000
5752 #define RWS_FIELD_TYPE __extension__ unsigned short
5754 #define RWS_FIELD_TYPE unsigned int
5756 struct reg_write_state
5758 RWS_FIELD_TYPE write_count : 2;
5759 RWS_FIELD_TYPE first_pred : 10;
5760 RWS_FIELD_TYPE written_by_fp : 1;
5761 RWS_FIELD_TYPE written_by_and : 1;
5762 RWS_FIELD_TYPE written_by_or : 1;
5765 /* Cumulative info for the current instruction group. */
5766 struct reg_write_state rws_sum[NUM_REGS];
5767 #ifdef ENABLE_CHECKING
5768 /* Bitmap whether a register has been written in the current insn. */
5769 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5770 / HOST_BITS_PER_WIDEST_FAST_INT];
5773 rws_insn_set (int regno)
5775 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5776 SET_HARD_REG_BIT (rws_insn, regno);
5780 rws_insn_test (int regno)
5782 return TEST_HARD_REG_BIT (rws_insn, regno);
5785 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5786 unsigned char rws_insn[2];
5789 rws_insn_set (int regno)
5791 if (regno == REG_AR_CFM)
5793 else if (regno == REG_VOLATILE)
5798 rws_insn_test (int regno)
5800 if (regno == REG_AR_CFM)
5802 if (regno == REG_VOLATILE)
5808 /* Indicates whether this is the first instruction after a stop bit,
5809 in which case we don't need another stop bit. Without this,
5810 ia64_variable_issue will die when scheduling an alloc. */
5811 static int first_instruction;
5813 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5814 RTL for one instruction. */
5817 unsigned int is_write : 1; /* Is register being written? */
5818 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5819 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5820 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5821 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5822 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5825 static void rws_update (int, struct reg_flags, int);
5826 static int rws_access_regno (int, struct reg_flags, int);
5827 static int rws_access_reg (rtx, struct reg_flags, int);
5828 static void update_set_flags (rtx, struct reg_flags *);
5829 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5830 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5831 static void init_insn_group_barriers (void);
5832 static int group_barrier_needed (rtx);
5833 static int safe_group_barrier_needed (rtx);
5834 static int in_safe_group_barrier;
5836 /* Update *RWS for REGNO, which is being written by the current instruction,
5837 with predicate PRED, and associated register flags in FLAGS. */
5840 rws_update (int regno, struct reg_flags flags, int pred)
5843 rws_sum[regno].write_count++;
5845 rws_sum[regno].write_count = 2;
5846 rws_sum[regno].written_by_fp |= flags.is_fp;
5847 /* ??? Not tracking and/or across differing predicates. */
5848 rws_sum[regno].written_by_and = flags.is_and;
5849 rws_sum[regno].written_by_or = flags.is_or;
5850 rws_sum[regno].first_pred = pred;
5853 /* Handle an access to register REGNO of type FLAGS using predicate register
5854 PRED. Update rws_sum array. Return 1 if this access creates
5855 a dependency with an earlier instruction in the same group. */
5858 rws_access_regno (int regno, struct reg_flags flags, int pred)
5860 int need_barrier = 0;
5862 gcc_assert (regno < NUM_REGS);
5864 if (! PR_REGNO_P (regno))
5865 flags.is_and = flags.is_or = 0;
5871 rws_insn_set (regno);
5872 write_count = rws_sum[regno].write_count;
5874 switch (write_count)
5877 /* The register has not been written yet. */
5878 if (!in_safe_group_barrier)
5879 rws_update (regno, flags, pred);
5883 /* The register has been written via a predicate. Treat
5884 it like a unconditional write and do not try to check
5885 for complementary pred reg in earlier write. */
5886 if (flags.is_and && rws_sum[regno].written_by_and)
5888 else if (flags.is_or && rws_sum[regno].written_by_or)
5892 if (!in_safe_group_barrier)
5893 rws_update (regno, flags, pred);
5897 /* The register has been unconditionally written already. We
5899 if (flags.is_and && rws_sum[regno].written_by_and)
5901 else if (flags.is_or && rws_sum[regno].written_by_or)
5905 if (!in_safe_group_barrier)
5907 rws_sum[regno].written_by_and = flags.is_and;
5908 rws_sum[regno].written_by_or = flags.is_or;
5918 if (flags.is_branch)
5920 /* Branches have several RAW exceptions that allow to avoid
5923 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5924 /* RAW dependencies on branch regs are permissible as long
5925 as the writer is a non-branch instruction. Since we
5926 never generate code that uses a branch register written
5927 by a branch instruction, handling this case is
5931 if (REGNO_REG_CLASS (regno) == PR_REGS
5932 && ! rws_sum[regno].written_by_fp)
5933 /* The predicates of a branch are available within the
5934 same insn group as long as the predicate was written by
5935 something other than a floating-point instruction. */
5939 if (flags.is_and && rws_sum[regno].written_by_and)
5941 if (flags.is_or && rws_sum[regno].written_by_or)
5944 switch (rws_sum[regno].write_count)
5947 /* The register has not been written yet. */
5951 /* The register has been written via a predicate, assume we
5952 need a barrier (don't check for complementary regs). */
5957 /* The register has been unconditionally written already. We
5967 return need_barrier;
5971 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5973 int regno = REGNO (reg);
5974 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5977 return rws_access_regno (regno, flags, pred);
5980 int need_barrier = 0;
5982 need_barrier |= rws_access_regno (regno + n, flags, pred);
5983 return need_barrier;
5987 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5988 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5991 update_set_flags (rtx x, struct reg_flags *pflags)
5993 rtx src = SET_SRC (x);
5995 switch (GET_CODE (src))
6001 /* There are four cases here:
6002 (1) The destination is (pc), in which case this is a branch,
6003 nothing here applies.
6004 (2) The destination is ar.lc, in which case this is a
6005 doloop_end_internal,
6006 (3) The destination is an fp register, in which case this is
6007 an fselect instruction.
6008 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6009 this is a check load.
6010 In all cases, nothing we do in this function applies. */
6014 if (COMPARISON_P (src)
6015 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6016 /* Set pflags->is_fp to 1 so that we know we're dealing
6017 with a floating point comparison when processing the
6018 destination of the SET. */
6021 /* Discover if this is a parallel comparison. We only handle
6022 and.orcm and or.andcm at present, since we must retain a
6023 strict inverse on the predicate pair. */
6024 else if (GET_CODE (src) == AND)
6026 else if (GET_CODE (src) == IOR)
6033 /* Subroutine of rtx_needs_barrier; this function determines whether the
6034 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6035 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6039 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6041 int need_barrier = 0;
6043 rtx src = SET_SRC (x);
6045 if (GET_CODE (src) == CALL)
6046 /* We don't need to worry about the result registers that
6047 get written by subroutine call. */
6048 return rtx_needs_barrier (src, flags, pred);
6049 else if (SET_DEST (x) == pc_rtx)
6051 /* X is a conditional branch. */
6052 /* ??? This seems redundant, as the caller sets this bit for
6054 if (!ia64_spec_check_src_p (src))
6055 flags.is_branch = 1;
6056 return rtx_needs_barrier (src, flags, pred);
6059 if (ia64_spec_check_src_p (src))
6060 /* Avoid checking one register twice (in condition
6061 and in 'then' section) for ldc pattern. */
6063 gcc_assert (REG_P (XEXP (src, 2)));
6064 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6066 /* We process MEM below. */
6067 src = XEXP (src, 1);
6070 need_barrier |= rtx_needs_barrier (src, flags, pred);
6073 if (GET_CODE (dst) == ZERO_EXTRACT)
6075 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6076 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6078 return need_barrier;
6081 /* Handle an access to rtx X of type FLAGS using predicate register
6082 PRED. Return 1 if this access creates a dependency with an earlier
6083 instruction in the same group. */
6086 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6089 int is_complemented = 0;
6090 int need_barrier = 0;
6091 const char *format_ptr;
6092 struct reg_flags new_flags;
6100 switch (GET_CODE (x))
6103 update_set_flags (x, &new_flags);
6104 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6105 if (GET_CODE (SET_SRC (x)) != CALL)
6107 new_flags.is_write = 1;
6108 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6113 new_flags.is_write = 0;
6114 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6116 /* Avoid multiple register writes, in case this is a pattern with
6117 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6118 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6120 new_flags.is_write = 1;
6121 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6122 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6123 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6128 /* X is a predicated instruction. */
6130 cond = COND_EXEC_TEST (x);
6132 need_barrier = rtx_needs_barrier (cond, flags, 0);
6134 if (GET_CODE (cond) == EQ)
6135 is_complemented = 1;
6136 cond = XEXP (cond, 0);
6137 gcc_assert (GET_CODE (cond) == REG
6138 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6139 pred = REGNO (cond);
6140 if (is_complemented)
6143 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6144 return need_barrier;
6148 /* Clobber & use are for earlier compiler-phases only. */
6153 /* We always emit stop bits for traditional asms. We emit stop bits
6154 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6155 if (GET_CODE (x) != ASM_OPERANDS
6156 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6158 /* Avoid writing the register multiple times if we have multiple
6159 asm outputs. This avoids a failure in rws_access_reg. */
6160 if (! rws_insn_test (REG_VOLATILE))
6162 new_flags.is_write = 1;
6163 rws_access_regno (REG_VOLATILE, new_flags, pred);
6168 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6169 We cannot just fall through here since then we would be confused
6170 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6171 traditional asms unlike their normal usage. */
6173 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6174 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6179 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6181 rtx pat = XVECEXP (x, 0, i);
6182 switch (GET_CODE (pat))
6185 update_set_flags (pat, &new_flags);
6186 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6192 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6203 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6205 rtx pat = XVECEXP (x, 0, i);
6206 if (GET_CODE (pat) == SET)
6208 if (GET_CODE (SET_SRC (pat)) != CALL)
6210 new_flags.is_write = 1;
6211 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6215 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6216 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6221 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6224 if (REGNO (x) == AR_UNAT_REGNUM)
6226 for (i = 0; i < 64; ++i)
6227 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6230 need_barrier = rws_access_reg (x, flags, pred);
6234 /* Find the regs used in memory address computation. */
6235 new_flags.is_write = 0;
6236 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6239 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6240 case SYMBOL_REF: case LABEL_REF: case CONST:
6243 /* Operators with side-effects. */
6244 case POST_INC: case POST_DEC:
6245 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6247 new_flags.is_write = 0;
6248 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6249 new_flags.is_write = 1;
6250 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6254 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6256 new_flags.is_write = 0;
6257 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6258 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6259 new_flags.is_write = 1;
6260 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6263 /* Handle common unary and binary ops for efficiency. */
6264 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6265 case MOD: case UDIV: case UMOD: case AND: case IOR:
6266 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6267 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6268 case NE: case EQ: case GE: case GT: case LE:
6269 case LT: case GEU: case GTU: case LEU: case LTU:
6270 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6271 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6274 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6275 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6276 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6277 case SQRT: case FFS: case POPCOUNT:
6278 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6282 /* VEC_SELECT's second argument is a PARALLEL with integers that
6283 describe the elements selected. On ia64, those integers are
6284 always constants. Avoid walking the PARALLEL so that we don't
6285 get confused with "normal" parallels and then die. */
6286 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6290 switch (XINT (x, 1))
6292 case UNSPEC_LTOFF_DTPMOD:
6293 case UNSPEC_LTOFF_DTPREL:
6295 case UNSPEC_LTOFF_TPREL:
6297 case UNSPEC_PRED_REL_MUTEX:
6298 case UNSPEC_PIC_CALL:
6300 case UNSPEC_FETCHADD_ACQ:
6301 case UNSPEC_BSP_VALUE:
6302 case UNSPEC_FLUSHRS:
6303 case UNSPEC_BUNDLE_SELECTOR:
6306 case UNSPEC_GR_SPILL:
6307 case UNSPEC_GR_RESTORE:
6309 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6310 HOST_WIDE_INT bit = (offset >> 3) & 63;
6312 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6313 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6314 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6319 case UNSPEC_FR_SPILL:
6320 case UNSPEC_FR_RESTORE:
6321 case UNSPEC_GETF_EXP:
6322 case UNSPEC_SETF_EXP:
6324 case UNSPEC_FR_SQRT_RECIP_APPROX:
6325 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6330 case UNSPEC_CHKACLR:
6332 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6335 case UNSPEC_FR_RECIP_APPROX:
6337 case UNSPEC_COPYSIGN:
6338 case UNSPEC_FR_RECIP_APPROX_RES:
6339 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6340 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6343 case UNSPEC_CMPXCHG_ACQ:
6344 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6345 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6353 case UNSPEC_VOLATILE:
6354 switch (XINT (x, 1))
6357 /* Alloc must always be the first instruction of a group.
6358 We force this by always returning true. */
6359 /* ??? We might get better scheduling if we explicitly check for
6360 input/local/output register dependencies, and modify the
6361 scheduler so that alloc is always reordered to the start of
6362 the current group. We could then eliminate all of the
6363 first_instruction code. */
6364 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6366 new_flags.is_write = 1;
6367 rws_access_regno (REG_AR_CFM, new_flags, pred);
6370 case UNSPECV_SET_BSP:
6374 case UNSPECV_BLOCKAGE:
6375 case UNSPECV_INSN_GROUP_BARRIER:
6377 case UNSPECV_PSAC_ALL:
6378 case UNSPECV_PSAC_NORMAL:
6387 new_flags.is_write = 0;
6388 need_barrier = rws_access_regno (REG_RP, flags, pred);
6389 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6391 new_flags.is_write = 1;
6392 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6393 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6397 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6398 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6399 switch (format_ptr[i])
6401 case '0': /* unused field */
6402 case 'i': /* integer */
6403 case 'n': /* note */
6404 case 'w': /* wide integer */
6405 case 's': /* pointer to string */
6406 case 'S': /* optional pointer to string */
6410 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6415 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6416 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6425 return need_barrier;
6428 /* Clear out the state for group_barrier_needed at the start of a
6429 sequence of insns. */
6432 init_insn_group_barriers (void)
6434 memset (rws_sum, 0, sizeof (rws_sum));
6435 first_instruction = 1;
6438 /* Given the current state, determine whether a group barrier (a stop bit) is
6439 necessary before INSN. Return nonzero if so. This modifies the state to
6440 include the effects of INSN as a side-effect. */
6443 group_barrier_needed (rtx insn)
6446 int need_barrier = 0;
6447 struct reg_flags flags;
6449 memset (&flags, 0, sizeof (flags));
6450 switch (GET_CODE (insn))
6457 /* A barrier doesn't imply an instruction group boundary. */
6461 memset (rws_insn, 0, sizeof (rws_insn));
6465 flags.is_branch = 1;
6466 flags.is_sibcall = SIBLING_CALL_P (insn);
6467 memset (rws_insn, 0, sizeof (rws_insn));
6469 /* Don't bundle a call following another call. */
6470 if ((pat = prev_active_insn (insn))
6471 && GET_CODE (pat) == CALL_INSN)
6477 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6481 if (!ia64_spec_check_p (insn))
6482 flags.is_branch = 1;
6484 /* Don't bundle a jump following a call. */
6485 if ((pat = prev_active_insn (insn))
6486 && GET_CODE (pat) == CALL_INSN)
6494 if (GET_CODE (PATTERN (insn)) == USE
6495 || GET_CODE (PATTERN (insn)) == CLOBBER)
6496 /* Don't care about USE and CLOBBER "insns"---those are used to
6497 indicate to the optimizer that it shouldn't get rid of
6498 certain operations. */
6501 pat = PATTERN (insn);
6503 /* Ug. Hack hacks hacked elsewhere. */
6504 switch (recog_memoized (insn))
6506 /* We play dependency tricks with the epilogue in order
6507 to get proper schedules. Undo this for dv analysis. */
6508 case CODE_FOR_epilogue_deallocate_stack:
6509 case CODE_FOR_prologue_allocate_stack:
6510 pat = XVECEXP (pat, 0, 0);
6513 /* The pattern we use for br.cloop confuses the code above.
6514 The second element of the vector is representative. */
6515 case CODE_FOR_doloop_end_internal:
6516 pat = XVECEXP (pat, 0, 1);
6519 /* Doesn't generate code. */
6520 case CODE_FOR_pred_rel_mutex:
6521 case CODE_FOR_prologue_use:
6528 memset (rws_insn, 0, sizeof (rws_insn));
6529 need_barrier = rtx_needs_barrier (pat, flags, 0);
6531 /* Check to see if the previous instruction was a volatile
6534 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6542 if (first_instruction && INSN_P (insn)
6543 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6544 && GET_CODE (PATTERN (insn)) != USE
6545 && GET_CODE (PATTERN (insn)) != CLOBBER)
6548 first_instruction = 0;
6551 return need_barrier;
6554 /* Like group_barrier_needed, but do not clobber the current state. */
6557 safe_group_barrier_needed (rtx insn)
6559 int saved_first_instruction;
6562 saved_first_instruction = first_instruction;
6563 in_safe_group_barrier = 1;
6565 t = group_barrier_needed (insn);
6567 first_instruction = saved_first_instruction;
6568 in_safe_group_barrier = 0;
6573 /* Scan the current function and insert stop bits as necessary to
6574 eliminate dependencies. This function assumes that a final
6575 instruction scheduling pass has been run which has already
6576 inserted most of the necessary stop bits. This function only
6577 inserts new ones at basic block boundaries, since these are
6578 invisible to the scheduler. */
6581 emit_insn_group_barriers (FILE *dump)
6585 int insns_since_last_label = 0;
6587 init_insn_group_barriers ();
6589 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6591 if (GET_CODE (insn) == CODE_LABEL)
6593 if (insns_since_last_label)
6595 insns_since_last_label = 0;
6597 else if (GET_CODE (insn) == NOTE
6598 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6600 if (insns_since_last_label)
6602 insns_since_last_label = 0;
6604 else if (GET_CODE (insn) == INSN
6605 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6606 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6608 init_insn_group_barriers ();
6611 else if (NONDEBUG_INSN_P (insn))
6613 insns_since_last_label = 1;
6615 if (group_barrier_needed (insn))
6620 fprintf (dump, "Emitting stop before label %d\n",
6621 INSN_UID (last_label));
6622 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6625 init_insn_group_barriers ();
6633 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6634 This function has to emit all necessary group barriers. */
6637 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6641 init_insn_group_barriers ();
6643 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6645 if (GET_CODE (insn) == BARRIER)
6647 rtx last = prev_active_insn (insn);
6651 if (GET_CODE (last) == JUMP_INSN
6652 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6653 last = prev_active_insn (last);
6654 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6655 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6657 init_insn_group_barriers ();
6659 else if (NONDEBUG_INSN_P (insn))
6661 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6662 init_insn_group_barriers ();
6663 else if (group_barrier_needed (insn))
6665 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6666 init_insn_group_barriers ();
6667 group_barrier_needed (insn);
6675 /* Instruction scheduling support. */
6677 #define NR_BUNDLES 10
6679 /* A list of names of all available bundles. */
6681 static const char *bundle_name [NR_BUNDLES] =
6687 #if NR_BUNDLES == 10
6697 /* Nonzero if we should insert stop bits into the schedule. */
6699 int ia64_final_schedule = 0;
6701 /* Codes of the corresponding queried units: */
6703 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6704 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6706 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6707 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6709 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6711 /* The following variable value is an insn group barrier. */
6713 static rtx dfa_stop_insn;
6715 /* The following variable value is the last issued insn. */
6717 static rtx last_scheduled_insn;
6719 /* The following variable value is pointer to a DFA state used as
6720 temporary variable. */
6722 static state_t temp_dfa_state = NULL;
6724 /* The following variable value is DFA state after issuing the last
6727 static state_t prev_cycle_state = NULL;
6729 /* The following array element values are TRUE if the corresponding
6730 insn requires to add stop bits before it. */
6732 static char *stops_p = NULL;
6734 /* The following variable is used to set up the mentioned above array. */
6736 static int stop_before_p = 0;
6738 /* The following variable value is length of the arrays `clocks' and
6741 static int clocks_length;
6743 /* The following variable value is number of data speculations in progress. */
6744 static int pending_data_specs = 0;
6746 /* Number of memory references on current and three future processor cycles. */
6747 static char mem_ops_in_group[4];
6749 /* Number of current processor cycle (from scheduler's point of view). */
6750 static int current_cycle;
6752 static rtx ia64_single_set (rtx);
6753 static void ia64_emit_insn_before (rtx, rtx);
6755 /* Map a bundle number to its pseudo-op. */
6758 get_bundle_name (int b)
6760 return bundle_name[b];
6764 /* Return the maximum number of instructions a cpu can issue. */
6767 ia64_issue_rate (void)
6772 /* Helper function - like single_set, but look inside COND_EXEC. */
6775 ia64_single_set (rtx insn)
6777 rtx x = PATTERN (insn), ret;
6778 if (GET_CODE (x) == COND_EXEC)
6779 x = COND_EXEC_CODE (x);
6780 if (GET_CODE (x) == SET)
6783 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6784 Although they are not classical single set, the second set is there just
6785 to protect it from moving past FP-relative stack accesses. */
6786 switch (recog_memoized (insn))
6788 case CODE_FOR_prologue_allocate_stack:
6789 case CODE_FOR_epilogue_deallocate_stack:
6790 ret = XVECEXP (x, 0, 0);
6794 ret = single_set_2 (insn, x);
6801 /* Adjust the cost of a scheduling dependency.
6802 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6803 COST is the current cost, DW is dependency weakness. */
6805 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6807 enum reg_note dep_type = (enum reg_note) dep_type1;
6808 enum attr_itanium_class dep_class;
6809 enum attr_itanium_class insn_class;
6811 insn_class = ia64_safe_itanium_class (insn);
6812 dep_class = ia64_safe_itanium_class (dep_insn);
6814 /* Treat true memory dependencies separately. Ignore apparent true
6815 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6816 if (dep_type == REG_DEP_TRUE
6817 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6818 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6821 if (dw == MIN_DEP_WEAK)
6822 /* Store and load are likely to alias, use higher cost to avoid stall. */
6823 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6824 else if (dw > MIN_DEP_WEAK)
6826 /* Store and load are less likely to alias. */
6827 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6828 /* Assume there will be no cache conflict for floating-point data.
6829 For integer data, L1 conflict penalty is huge (17 cycles), so we
6830 never assume it will not cause a conflict. */
6836 if (dep_type != REG_DEP_OUTPUT)
6839 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6840 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6846 /* Like emit_insn_before, but skip cycle_display notes.
6847 ??? When cycle display notes are implemented, update this. */
6850 ia64_emit_insn_before (rtx insn, rtx before)
6852 emit_insn_before (insn, before);
6855 /* The following function marks insns who produce addresses for load
6856 and store insns. Such insns will be placed into M slots because it
6857 decrease latency time for Itanium1 (see function
6858 `ia64_produce_address_p' and the DFA descriptions). */
6861 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6863 rtx insn, next, next_tail;
6865 /* Before reload, which_alternative is not set, which means that
6866 ia64_safe_itanium_class will produce wrong results for (at least)
6867 move instructions. */
6868 if (!reload_completed)
6871 next_tail = NEXT_INSN (tail);
6872 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6875 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6877 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6879 sd_iterator_def sd_it;
6881 bool has_mem_op_consumer_p = false;
6883 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6885 enum attr_itanium_class c;
6887 if (DEP_TYPE (dep) != REG_DEP_TRUE)
6890 next = DEP_CON (dep);
6891 c = ia64_safe_itanium_class (next);
6892 if ((c == ITANIUM_CLASS_ST
6893 || c == ITANIUM_CLASS_STF)
6894 && ia64_st_address_bypass_p (insn, next))
6896 has_mem_op_consumer_p = true;
6899 else if ((c == ITANIUM_CLASS_LD
6900 || c == ITANIUM_CLASS_FLD
6901 || c == ITANIUM_CLASS_FLDP)
6902 && ia64_ld_address_bypass_p (insn, next))
6904 has_mem_op_consumer_p = true;
6909 insn->call = has_mem_op_consumer_p;
6913 /* We're beginning a new block. Initialize data structures as necessary. */
6916 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6917 int sched_verbose ATTRIBUTE_UNUSED,
6918 int max_ready ATTRIBUTE_UNUSED)
6920 #ifdef ENABLE_CHECKING
6923 if (!sel_sched_p () && reload_completed)
6924 for (insn = NEXT_INSN (current_sched_info->prev_head);
6925 insn != current_sched_info->next_tail;
6926 insn = NEXT_INSN (insn))
6927 gcc_assert (!SCHED_GROUP_P (insn));
6929 last_scheduled_insn = NULL_RTX;
6930 init_insn_group_barriers ();
6933 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
6936 /* We're beginning a scheduling pass. Check assertion. */
6939 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6940 int sched_verbose ATTRIBUTE_UNUSED,
6941 int max_ready ATTRIBUTE_UNUSED)
6943 gcc_assert (pending_data_specs == 0);
6946 /* Scheduling pass is now finished. Free/reset static variable. */
6948 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6949 int sched_verbose ATTRIBUTE_UNUSED)
6951 gcc_assert (pending_data_specs == 0);
6954 /* Return TRUE if INSN is a load (either normal or speculative, but not a
6955 speculation check), FALSE otherwise. */
6957 is_load_p (rtx insn)
6959 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6962 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6963 && get_attr_check_load (insn) == CHECK_LOAD_NO);
6966 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6967 (taking account for 3-cycle cache reference postponing for stores: Intel
6968 Itanium 2 Reference Manual for Software Development and Optimization,
6971 record_memory_reference (rtx insn)
6973 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6975 switch (insn_class) {
6976 case ITANIUM_CLASS_FLD:
6977 case ITANIUM_CLASS_LD:
6978 mem_ops_in_group[current_cycle % 4]++;
6980 case ITANIUM_CLASS_STF:
6981 case ITANIUM_CLASS_ST:
6982 mem_ops_in_group[(current_cycle + 3) % 4]++;
6988 /* We are about to being issuing insns for this clock cycle.
6989 Override the default sort algorithm to better slot instructions. */
6992 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6993 int *pn_ready, int clock_var,
6997 int n_ready = *pn_ready;
6998 rtx *e_ready = ready + n_ready;
7002 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7004 if (reorder_type == 0)
7006 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7008 for (insnp = ready; insnp < e_ready; insnp++)
7009 if (insnp < e_ready)
7012 enum attr_type t = ia64_safe_type (insn);
7013 if (t == TYPE_UNKNOWN)
7015 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7016 || asm_noperands (PATTERN (insn)) >= 0)
7018 rtx lowest = ready[n_asms];
7019 ready[n_asms] = insn;
7025 rtx highest = ready[n_ready - 1];
7026 ready[n_ready - 1] = insn;
7033 if (n_asms < n_ready)
7035 /* Some normal insns to process. Skip the asms. */
7039 else if (n_ready > 0)
7043 if (ia64_final_schedule)
7046 int nr_need_stop = 0;
7048 for (insnp = ready; insnp < e_ready; insnp++)
7049 if (safe_group_barrier_needed (*insnp))
7052 if (reorder_type == 1 && n_ready == nr_need_stop)
7054 if (reorder_type == 0)
7057 /* Move down everything that needs a stop bit, preserving
7059 while (insnp-- > ready + deleted)
7060 while (insnp >= ready + deleted)
7063 if (! safe_group_barrier_needed (insn))
7065 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7073 current_cycle = clock_var;
7074 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7079 /* Move down loads/stores, preserving relative order. */
7080 while (insnp-- > ready + moved)
7081 while (insnp >= ready + moved)
7084 if (! is_load_p (insn))
7086 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7097 /* We are about to being issuing insns for this clock cycle. Override
7098 the default sort algorithm to better slot instructions. */
7101 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7104 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7105 pn_ready, clock_var, 0);
7108 /* Like ia64_sched_reorder, but called after issuing each insn.
7109 Override the default sort algorithm to better slot instructions. */
7112 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7113 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7114 int *pn_ready, int clock_var)
7116 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7120 /* We are about to issue INSN. Return the number of insns left on the
7121 ready queue that can be issued this cycle. */
7124 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7125 int sched_verbose ATTRIBUTE_UNUSED,
7126 rtx insn ATTRIBUTE_UNUSED,
7127 int can_issue_more ATTRIBUTE_UNUSED)
7129 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7130 /* Modulo scheduling does not extend h_i_d when emitting
7131 new instructions. Don't use h_i_d, if we don't have to. */
7133 if (DONE_SPEC (insn) & BEGIN_DATA)
7134 pending_data_specs++;
7135 if (CHECK_SPEC (insn) & BEGIN_DATA)
7136 pending_data_specs--;
7139 if (DEBUG_INSN_P (insn))
7142 last_scheduled_insn = insn;
7143 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7144 if (reload_completed)
7146 int needed = group_barrier_needed (insn);
7148 gcc_assert (!needed);
7149 if (GET_CODE (insn) == CALL_INSN)
7150 init_insn_group_barriers ();
7151 stops_p [INSN_UID (insn)] = stop_before_p;
7154 record_memory_reference (insn);
7159 /* We are choosing insn from the ready queue. Return nonzero if INSN
7163 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7165 gcc_assert (insn && INSN_P (insn));
7166 return ((!reload_completed
7167 || !safe_group_barrier_needed (insn))
7168 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7169 && (!mflag_sched_mem_insns_hard_limit
7170 || !is_load_p (insn)
7171 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7174 /* We are choosing insn from the ready queue. Return nonzero if INSN
7178 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7180 gcc_assert (insn && INSN_P (insn));
7181 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7182 we keep ALAT half-empty. */
7183 return (pending_data_specs < 16
7184 || !(TODO_SPEC (insn) & BEGIN_DATA));
7187 /* The following variable value is pseudo-insn used by the DFA insn
7188 scheduler to change the DFA state when the simulated clock is
7191 static rtx dfa_pre_cycle_insn;
7193 /* Returns 1 when a meaningful insn was scheduled between the last group
7194 barrier and LAST. */
7196 scheduled_good_insn (rtx last)
7198 if (last && recog_memoized (last) >= 0)
7202 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7203 && !stops_p[INSN_UID (last)];
7204 last = PREV_INSN (last))
7205 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7206 the ebb we're scheduling. */
7207 if (INSN_P (last) && recog_memoized (last) >= 0)
7213 /* We are about to being issuing INSN. Return nonzero if we cannot
7214 issue it on given cycle CLOCK and return zero if we should not sort
7215 the ready queue on the next clock start. */
7218 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7219 int clock, int *sort_p)
7221 gcc_assert (insn && INSN_P (insn));
7223 if (DEBUG_INSN_P (insn))
7226 /* When a group barrier is needed for insn, last_scheduled_insn
7228 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7229 || last_scheduled_insn);
7231 if ((reload_completed
7232 && (safe_group_barrier_needed (insn)
7233 || (mflag_sched_stop_bits_after_every_cycle
7234 && last_clock != clock
7235 && last_scheduled_insn
7236 && scheduled_good_insn (last_scheduled_insn))))
7237 || (last_scheduled_insn
7238 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7239 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7240 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7242 init_insn_group_barriers ();
7244 if (verbose && dump)
7245 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7246 last_clock == clock ? " + cycle advance" : "");
7249 current_cycle = clock;
7250 mem_ops_in_group[current_cycle % 4] = 0;
7252 if (last_clock == clock)
7254 state_transition (curr_state, dfa_stop_insn);
7255 if (TARGET_EARLY_STOP_BITS)
7256 *sort_p = (last_scheduled_insn == NULL_RTX
7257 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7263 if (last_scheduled_insn)
7265 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7266 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7267 state_reset (curr_state);
7270 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7271 state_transition (curr_state, dfa_stop_insn);
7272 state_transition (curr_state, dfa_pre_cycle_insn);
7273 state_transition (curr_state, NULL);
7280 /* Implement targetm.sched.h_i_d_extended hook.
7281 Extend internal data structures. */
7283 ia64_h_i_d_extended (void)
7285 if (stops_p != NULL)
7287 int new_clocks_length = get_max_uid () * 3 / 2;
7288 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7289 clocks_length = new_clocks_length;
7294 /* This structure describes the data used by the backend to guide scheduling.
7295 When the current scheduling point is switched, this data should be saved
7296 and restored later, if the scheduler returns to this point. */
7297 struct _ia64_sched_context
7299 state_t prev_cycle_state;
7300 rtx last_scheduled_insn;
7301 struct reg_write_state rws_sum[NUM_REGS];
7302 struct reg_write_state rws_insn[NUM_REGS];
7303 int first_instruction;
7304 int pending_data_specs;
7306 char mem_ops_in_group[4];
7308 typedef struct _ia64_sched_context *ia64_sched_context_t;
7310 /* Allocates a scheduling context. */
7312 ia64_alloc_sched_context (void)
7314 return xmalloc (sizeof (struct _ia64_sched_context));
7317 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7318 the global context otherwise. */
7320 ia64_init_sched_context (void *_sc, bool clean_p)
7322 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7324 sc->prev_cycle_state = xmalloc (dfa_state_size);
7327 state_reset (sc->prev_cycle_state);
7328 sc->last_scheduled_insn = NULL_RTX;
7329 memset (sc->rws_sum, 0, sizeof (rws_sum));
7330 memset (sc->rws_insn, 0, sizeof (rws_insn));
7331 sc->first_instruction = 1;
7332 sc->pending_data_specs = 0;
7333 sc->current_cycle = 0;
7334 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7338 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7339 sc->last_scheduled_insn = last_scheduled_insn;
7340 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7341 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7342 sc->first_instruction = first_instruction;
7343 sc->pending_data_specs = pending_data_specs;
7344 sc->current_cycle = current_cycle;
7345 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7349 /* Sets the global scheduling context to the one pointed to by _SC. */
7351 ia64_set_sched_context (void *_sc)
7353 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7355 gcc_assert (sc != NULL);
7357 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7358 last_scheduled_insn = sc->last_scheduled_insn;
7359 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7360 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7361 first_instruction = sc->first_instruction;
7362 pending_data_specs = sc->pending_data_specs;
7363 current_cycle = sc->current_cycle;
7364 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7367 /* Clears the data in the _SC scheduling context. */
7369 ia64_clear_sched_context (void *_sc)
7371 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7373 free (sc->prev_cycle_state);
7374 sc->prev_cycle_state = NULL;
7377 /* Frees the _SC scheduling context. */
7379 ia64_free_sched_context (void *_sc)
7381 gcc_assert (_sc != NULL);
7386 typedef rtx (* gen_func_t) (rtx, rtx);
7388 /* Return a function that will generate a load of mode MODE_NO
7389 with speculation types TS. */
7391 get_spec_load_gen_function (ds_t ts, int mode_no)
7393 static gen_func_t gen_ld_[] = {
7403 gen_zero_extendqidi2,
7404 gen_zero_extendhidi2,
7405 gen_zero_extendsidi2,
7408 static gen_func_t gen_ld_a[] = {
7418 gen_zero_extendqidi2_advanced,
7419 gen_zero_extendhidi2_advanced,
7420 gen_zero_extendsidi2_advanced,
7422 static gen_func_t gen_ld_s[] = {
7423 gen_movbi_speculative,
7424 gen_movqi_speculative,
7425 gen_movhi_speculative,
7426 gen_movsi_speculative,
7427 gen_movdi_speculative,
7428 gen_movsf_speculative,
7429 gen_movdf_speculative,
7430 gen_movxf_speculative,
7431 gen_movti_speculative,
7432 gen_zero_extendqidi2_speculative,
7433 gen_zero_extendhidi2_speculative,
7434 gen_zero_extendsidi2_speculative,
7436 static gen_func_t gen_ld_sa[] = {
7437 gen_movbi_speculative_advanced,
7438 gen_movqi_speculative_advanced,
7439 gen_movhi_speculative_advanced,
7440 gen_movsi_speculative_advanced,
7441 gen_movdi_speculative_advanced,
7442 gen_movsf_speculative_advanced,
7443 gen_movdf_speculative_advanced,
7444 gen_movxf_speculative_advanced,
7445 gen_movti_speculative_advanced,
7446 gen_zero_extendqidi2_speculative_advanced,
7447 gen_zero_extendhidi2_speculative_advanced,
7448 gen_zero_extendsidi2_speculative_advanced,
7450 static gen_func_t gen_ld_s_a[] = {
7451 gen_movbi_speculative_a,
7452 gen_movqi_speculative_a,
7453 gen_movhi_speculative_a,
7454 gen_movsi_speculative_a,
7455 gen_movdi_speculative_a,
7456 gen_movsf_speculative_a,
7457 gen_movdf_speculative_a,
7458 gen_movxf_speculative_a,
7459 gen_movti_speculative_a,
7460 gen_zero_extendqidi2_speculative_a,
7461 gen_zero_extendhidi2_speculative_a,
7462 gen_zero_extendsidi2_speculative_a,
7467 if (ts & BEGIN_DATA)
7469 if (ts & BEGIN_CONTROL)
7474 else if (ts & BEGIN_CONTROL)
7476 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7477 || ia64_needs_block_p (ts))
7480 gen_ld = gen_ld_s_a;
7487 return gen_ld[mode_no];
7490 /* Constants that help mapping 'enum machine_mode' to int. */
7493 SPEC_MODE_INVALID = -1,
7494 SPEC_MODE_FIRST = 0,
7495 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7496 SPEC_MODE_FOR_EXTEND_LAST = 3,
7502 /* Offset to reach ZERO_EXTEND patterns. */
7503 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7506 /* Return index of the MODE. */
7508 ia64_mode_to_int (enum machine_mode mode)
7512 case BImode: return 0; /* SPEC_MODE_FIRST */
7513 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7514 case HImode: return 2;
7515 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7516 case DImode: return 4;
7517 case SFmode: return 5;
7518 case DFmode: return 6;
7519 case XFmode: return 7;
7521 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7522 mentioned in itanium[12].md. Predicate fp_register_operand also
7523 needs to be defined. Bottom line: better disable for now. */
7524 return SPEC_MODE_INVALID;
7525 default: return SPEC_MODE_INVALID;
7529 /* Provide information about speculation capabilities. */
7531 ia64_set_sched_flags (spec_info_t spec_info)
7533 unsigned int *flags = &(current_sched_info->flags);
7535 if (*flags & SCHED_RGN
7536 || *flags & SCHED_EBB
7537 || *flags & SEL_SCHED)
7541 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7542 || (mflag_sched_ar_data_spec && reload_completed))
7547 && ((mflag_sched_br_in_data_spec && !reload_completed)
7548 || (mflag_sched_ar_in_data_spec && reload_completed)))
7552 if (mflag_sched_control_spec
7554 || reload_completed))
7556 mask |= BEGIN_CONTROL;
7558 if (!sel_sched_p () && mflag_sched_in_control_spec)
7559 mask |= BE_IN_CONTROL;
7562 spec_info->mask = mask;
7566 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7568 if (mask & BE_IN_SPEC)
7571 spec_info->flags = 0;
7573 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7574 spec_info->flags |= PREFER_NON_DATA_SPEC;
7576 if (mask & CONTROL_SPEC)
7578 if (mflag_sched_prefer_non_control_spec_insns)
7579 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7581 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7582 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7585 if (sched_verbose >= 1)
7586 spec_info->dump = sched_dump;
7588 spec_info->dump = 0;
7590 if (mflag_sched_count_spec_in_critical_path)
7591 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7595 spec_info->mask = 0;
7598 /* If INSN is an appropriate load return its mode.
7599 Return -1 otherwise. */
7601 get_mode_no_for_insn (rtx insn)
7603 rtx reg, mem, mode_rtx;
7607 extract_insn_cached (insn);
7609 /* We use WHICH_ALTERNATIVE only after reload. This will
7610 guarantee that reload won't touch a speculative insn. */
7612 if (recog_data.n_operands != 2)
7615 reg = recog_data.operand[0];
7616 mem = recog_data.operand[1];
7618 /* We should use MEM's mode since REG's mode in presence of
7619 ZERO_EXTEND will always be DImode. */
7620 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7621 /* Process non-speculative ld. */
7623 if (!reload_completed)
7625 /* Do not speculate into regs like ar.lc. */
7626 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7633 rtx mem_reg = XEXP (mem, 0);
7635 if (!REG_P (mem_reg))
7641 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7643 gcc_assert (REG_P (reg) && MEM_P (mem));
7649 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7650 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7651 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7652 /* Process speculative ld or ld.c. */
7654 gcc_assert (REG_P (reg) && MEM_P (mem));
7659 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7661 if (attr_class == ITANIUM_CLASS_CHK_A
7662 || attr_class == ITANIUM_CLASS_CHK_S_I
7663 || attr_class == ITANIUM_CLASS_CHK_S_F)
7670 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7672 if (mode_no == SPEC_MODE_INVALID)
7675 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7679 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7680 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7683 mode_no += SPEC_GEN_EXTEND_OFFSET;
7689 /* If X is an unspec part of a speculative load, return its code.
7690 Return -1 otherwise. */
7692 get_spec_unspec_code (const_rtx x)
7694 if (GET_CODE (x) != UNSPEC)
7716 /* Implement skip_rtx_p hook. */
7718 ia64_skip_rtx_p (const_rtx x)
7720 return get_spec_unspec_code (x) != -1;
7723 /* If INSN is a speculative load, return its UNSPEC code.
7724 Return -1 otherwise. */
7726 get_insn_spec_code (const_rtx insn)
7730 pat = PATTERN (insn);
7732 if (GET_CODE (pat) == COND_EXEC)
7733 pat = COND_EXEC_CODE (pat);
7735 if (GET_CODE (pat) != SET)
7738 reg = SET_DEST (pat);
7742 mem = SET_SRC (pat);
7743 if (GET_CODE (mem) == ZERO_EXTEND)
7744 mem = XEXP (mem, 0);
7746 return get_spec_unspec_code (mem);
7749 /* If INSN is a speculative load, return a ds with the speculation types.
7750 Otherwise [if INSN is a normal instruction] return 0. */
7752 ia64_get_insn_spec_ds (rtx insn)
7754 int code = get_insn_spec_code (insn);
7763 return BEGIN_CONTROL;
7766 return BEGIN_DATA | BEGIN_CONTROL;
7773 /* If INSN is a speculative load return a ds with the speculation types that
7775 Otherwise [if INSN is a normal instruction] return 0. */
7777 ia64_get_insn_checked_ds (rtx insn)
7779 int code = get_insn_spec_code (insn);
7784 return BEGIN_DATA | BEGIN_CONTROL;
7787 return BEGIN_CONTROL;
7791 return BEGIN_DATA | BEGIN_CONTROL;
7798 /* If GEN_P is true, calculate the index of needed speculation check and return
7799 speculative pattern for INSN with speculative mode TS, machine mode
7800 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7801 If GEN_P is false, just calculate the index of needed speculation check. */
7803 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7806 gen_func_t gen_load;
7808 gen_load = get_spec_load_gen_function (ts, mode_no);
7810 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7811 copy_rtx (recog_data.operand[1]));
7813 pat = PATTERN (insn);
7814 if (GET_CODE (pat) == COND_EXEC)
7815 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7822 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7823 ds_t ds ATTRIBUTE_UNUSED)
7828 /* Implement targetm.sched.speculate_insn hook.
7829 Check if the INSN can be TS speculative.
7830 If 'no' - return -1.
7831 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7832 If current pattern of the INSN already provides TS speculation,
7835 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7840 gcc_assert (!(ts & ~SPECULATIVE));
7842 if (ia64_spec_check_p (insn))
7845 if ((ts & BE_IN_SPEC)
7846 && !insn_can_be_in_speculative_p (insn, ts))
7849 mode_no = get_mode_no_for_insn (insn);
7851 if (mode_no != SPEC_MODE_INVALID)
7853 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7858 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7867 /* Return a function that will generate a check for speculation TS with mode
7869 If simple check is needed, pass true for SIMPLE_CHECK_P.
7870 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7872 get_spec_check_gen_function (ds_t ts, int mode_no,
7873 bool simple_check_p, bool clearing_check_p)
7875 static gen_func_t gen_ld_c_clr[] = {
7885 gen_zero_extendqidi2_clr,
7886 gen_zero_extendhidi2_clr,
7887 gen_zero_extendsidi2_clr,
7889 static gen_func_t gen_ld_c_nc[] = {
7899 gen_zero_extendqidi2_nc,
7900 gen_zero_extendhidi2_nc,
7901 gen_zero_extendsidi2_nc,
7903 static gen_func_t gen_chk_a_clr[] = {
7904 gen_advanced_load_check_clr_bi,
7905 gen_advanced_load_check_clr_qi,
7906 gen_advanced_load_check_clr_hi,
7907 gen_advanced_load_check_clr_si,
7908 gen_advanced_load_check_clr_di,
7909 gen_advanced_load_check_clr_sf,
7910 gen_advanced_load_check_clr_df,
7911 gen_advanced_load_check_clr_xf,
7912 gen_advanced_load_check_clr_ti,
7913 gen_advanced_load_check_clr_di,
7914 gen_advanced_load_check_clr_di,
7915 gen_advanced_load_check_clr_di,
7917 static gen_func_t gen_chk_a_nc[] = {
7918 gen_advanced_load_check_nc_bi,
7919 gen_advanced_load_check_nc_qi,
7920 gen_advanced_load_check_nc_hi,
7921 gen_advanced_load_check_nc_si,
7922 gen_advanced_load_check_nc_di,
7923 gen_advanced_load_check_nc_sf,
7924 gen_advanced_load_check_nc_df,
7925 gen_advanced_load_check_nc_xf,
7926 gen_advanced_load_check_nc_ti,
7927 gen_advanced_load_check_nc_di,
7928 gen_advanced_load_check_nc_di,
7929 gen_advanced_load_check_nc_di,
7931 static gen_func_t gen_chk_s[] = {
7932 gen_speculation_check_bi,
7933 gen_speculation_check_qi,
7934 gen_speculation_check_hi,
7935 gen_speculation_check_si,
7936 gen_speculation_check_di,
7937 gen_speculation_check_sf,
7938 gen_speculation_check_df,
7939 gen_speculation_check_xf,
7940 gen_speculation_check_ti,
7941 gen_speculation_check_di,
7942 gen_speculation_check_di,
7943 gen_speculation_check_di,
7946 gen_func_t *gen_check;
7948 if (ts & BEGIN_DATA)
7950 /* We don't need recovery because even if this is ld.sa
7951 ALAT entry will be allocated only if NAT bit is set to zero.
7952 So it is enough to use ld.c here. */
7956 gcc_assert (mflag_sched_spec_ldc);
7958 if (clearing_check_p)
7959 gen_check = gen_ld_c_clr;
7961 gen_check = gen_ld_c_nc;
7965 if (clearing_check_p)
7966 gen_check = gen_chk_a_clr;
7968 gen_check = gen_chk_a_nc;
7971 else if (ts & BEGIN_CONTROL)
7974 /* We might want to use ld.sa -> ld.c instead of
7977 gcc_assert (!ia64_needs_block_p (ts));
7979 if (clearing_check_p)
7980 gen_check = gen_ld_c_clr;
7982 gen_check = gen_ld_c_nc;
7986 gen_check = gen_chk_s;
7992 gcc_assert (mode_no >= 0);
7993 return gen_check[mode_no];
7996 /* Return nonzero, if INSN needs branchy recovery check. */
7998 ia64_needs_block_p (ds_t ts)
8000 if (ts & BEGIN_DATA)
8001 return !mflag_sched_spec_ldc;
8003 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8005 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8008 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8009 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8010 Otherwise, generate a simple check. */
8012 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
8014 rtx op1, pat, check_pat;
8015 gen_func_t gen_check;
8018 mode_no = get_mode_no_for_insn (insn);
8019 gcc_assert (mode_no >= 0);
8025 gcc_assert (!ia64_needs_block_p (ds));
8026 op1 = copy_rtx (recog_data.operand[1]);
8029 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8032 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8034 pat = PATTERN (insn);
8035 if (GET_CODE (pat) == COND_EXEC)
8036 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8042 /* Return nonzero, if X is branchy recovery check. */
8044 ia64_spec_check_p (rtx x)
8047 if (GET_CODE (x) == COND_EXEC)
8048 x = COND_EXEC_CODE (x);
8049 if (GET_CODE (x) == SET)
8050 return ia64_spec_check_src_p (SET_SRC (x));
8054 /* Return nonzero, if SRC belongs to recovery check. */
8056 ia64_spec_check_src_p (rtx src)
8058 if (GET_CODE (src) == IF_THEN_ELSE)
8063 if (GET_CODE (t) == NE)
8067 if (GET_CODE (t) == UNSPEC)
8073 if (code == UNSPEC_LDCCLR
8074 || code == UNSPEC_LDCNC
8075 || code == UNSPEC_CHKACLR
8076 || code == UNSPEC_CHKANC
8077 || code == UNSPEC_CHKS)
8079 gcc_assert (code != 0);
8089 /* The following page contains abstract data `bundle states' which are
8090 used for bundling insns (inserting nops and template generation). */
8092 /* The following describes state of insn bundling. */
8096 /* Unique bundle state number to identify them in the debugging
8099 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
8100 /* number nops before and after the insn */
8101 short before_nops_num, after_nops_num;
8102 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8104 int cost; /* cost of the state in cycles */
8105 int accumulated_insns_num; /* number of all previous insns including
8106 nops. L is considered as 2 insns */
8107 int branch_deviation; /* deviation of previous branches from 3rd slots */
8108 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8109 struct bundle_state *next; /* next state with the same insn_num */
8110 struct bundle_state *originator; /* originator (previous insn state) */
8111 /* All bundle states are in the following chain. */
8112 struct bundle_state *allocated_states_chain;
8113 /* The DFA State after issuing the insn and the nops. */
8117 /* The following is map insn number to the corresponding bundle state. */
8119 static struct bundle_state **index_to_bundle_states;
8121 /* The unique number of next bundle state. */
8123 static int bundle_states_num;
8125 /* All allocated bundle states are in the following chain. */
8127 static struct bundle_state *allocated_bundle_states_chain;
8129 /* All allocated but not used bundle states are in the following
8132 static struct bundle_state *free_bundle_state_chain;
8135 /* The following function returns a free bundle state. */
8137 static struct bundle_state *
8138 get_free_bundle_state (void)
8140 struct bundle_state *result;
8142 if (free_bundle_state_chain != NULL)
8144 result = free_bundle_state_chain;
8145 free_bundle_state_chain = result->next;
8149 result = XNEW (struct bundle_state);
8150 result->dfa_state = xmalloc (dfa_state_size);
8151 result->allocated_states_chain = allocated_bundle_states_chain;
8152 allocated_bundle_states_chain = result;
8154 result->unique_num = bundle_states_num++;
8159 /* The following function frees given bundle state. */
8162 free_bundle_state (struct bundle_state *state)
8164 state->next = free_bundle_state_chain;
8165 free_bundle_state_chain = state;
8168 /* Start work with abstract data `bundle states'. */
8171 initiate_bundle_states (void)
8173 bundle_states_num = 0;
8174 free_bundle_state_chain = NULL;
8175 allocated_bundle_states_chain = NULL;
8178 /* Finish work with abstract data `bundle states'. */
8181 finish_bundle_states (void)
8183 struct bundle_state *curr_state, *next_state;
8185 for (curr_state = allocated_bundle_states_chain;
8187 curr_state = next_state)
8189 next_state = curr_state->allocated_states_chain;
8190 free (curr_state->dfa_state);
8195 /* Hash table of the bundle states. The key is dfa_state and insn_num
8196 of the bundle states. */
8198 static htab_t bundle_state_table;
8200 /* The function returns hash of BUNDLE_STATE. */
8203 bundle_state_hash (const void *bundle_state)
8205 const struct bundle_state *const state
8206 = (const struct bundle_state *) bundle_state;
8209 for (result = i = 0; i < dfa_state_size; i++)
8210 result += (((unsigned char *) state->dfa_state) [i]
8211 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8212 return result + state->insn_num;
8215 /* The function returns nonzero if the bundle state keys are equal. */
8218 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8220 const struct bundle_state *const state1
8221 = (const struct bundle_state *) bundle_state_1;
8222 const struct bundle_state *const state2
8223 = (const struct bundle_state *) bundle_state_2;
8225 return (state1->insn_num == state2->insn_num
8226 && memcmp (state1->dfa_state, state2->dfa_state,
8227 dfa_state_size) == 0);
8230 /* The function inserts the BUNDLE_STATE into the hash table. The
8231 function returns nonzero if the bundle has been inserted into the
8232 table. The table contains the best bundle state with given key. */
8235 insert_bundle_state (struct bundle_state *bundle_state)
8239 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8240 if (*entry_ptr == NULL)
8242 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8243 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8244 *entry_ptr = (void *) bundle_state;
8247 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8248 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8249 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8250 > bundle_state->accumulated_insns_num
8251 || (((struct bundle_state *)
8252 *entry_ptr)->accumulated_insns_num
8253 == bundle_state->accumulated_insns_num
8254 && (((struct bundle_state *)
8255 *entry_ptr)->branch_deviation
8256 > bundle_state->branch_deviation
8257 || (((struct bundle_state *)
8258 *entry_ptr)->branch_deviation
8259 == bundle_state->branch_deviation
8260 && ((struct bundle_state *)
8261 *entry_ptr)->middle_bundle_stops
8262 > bundle_state->middle_bundle_stops))))))
8265 struct bundle_state temp;
8267 temp = *(struct bundle_state *) *entry_ptr;
8268 *(struct bundle_state *) *entry_ptr = *bundle_state;
8269 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8270 *bundle_state = temp;
8275 /* Start work with the hash table. */
8278 initiate_bundle_state_table (void)
8280 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8284 /* Finish work with the hash table. */
8287 finish_bundle_state_table (void)
8289 htab_delete (bundle_state_table);
8294 /* The following variable is a insn `nop' used to check bundle states
8295 with different number of inserted nops. */
8297 static rtx ia64_nop;
8299 /* The following function tries to issue NOPS_NUM nops for the current
8300 state without advancing processor cycle. If it failed, the
8301 function returns FALSE and frees the current state. */
8304 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8308 for (i = 0; i < nops_num; i++)
8309 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8311 free_bundle_state (curr_state);
8317 /* The following function tries to issue INSN for the current
8318 state without advancing processor cycle. If it failed, the
8319 function returns FALSE and frees the current state. */
8322 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8324 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8326 free_bundle_state (curr_state);
8332 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8333 starting with ORIGINATOR without advancing processor cycle. If
8334 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8335 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8336 If it was successful, the function creates new bundle state and
8337 insert into the hash table and into `index_to_bundle_states'. */
8340 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8341 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8343 struct bundle_state *curr_state;
8345 curr_state = get_free_bundle_state ();
8346 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8347 curr_state->insn = insn;
8348 curr_state->insn_num = originator->insn_num + 1;
8349 curr_state->cost = originator->cost;
8350 curr_state->originator = originator;
8351 curr_state->before_nops_num = before_nops_num;
8352 curr_state->after_nops_num = 0;
8353 curr_state->accumulated_insns_num
8354 = originator->accumulated_insns_num + before_nops_num;
8355 curr_state->branch_deviation = originator->branch_deviation;
8356 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8358 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8360 gcc_assert (GET_MODE (insn) != TImode);
8361 if (!try_issue_nops (curr_state, before_nops_num))
8363 if (!try_issue_insn (curr_state, insn))
8365 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8366 if (curr_state->accumulated_insns_num % 3 != 0)
8367 curr_state->middle_bundle_stops++;
8368 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8369 && curr_state->accumulated_insns_num % 3 != 0)
8371 free_bundle_state (curr_state);
8375 else if (GET_MODE (insn) != TImode)
8377 if (!try_issue_nops (curr_state, before_nops_num))
8379 if (!try_issue_insn (curr_state, insn))
8381 curr_state->accumulated_insns_num++;
8382 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8383 && asm_noperands (PATTERN (insn)) < 0);
8385 if (ia64_safe_type (insn) == TYPE_L)
8386 curr_state->accumulated_insns_num++;
8390 /* If this is an insn that must be first in a group, then don't allow
8391 nops to be emitted before it. Currently, alloc is the only such
8392 supported instruction. */
8393 /* ??? The bundling automatons should handle this for us, but they do
8394 not yet have support for the first_insn attribute. */
8395 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8397 free_bundle_state (curr_state);
8401 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8402 state_transition (curr_state->dfa_state, NULL);
8404 if (!try_issue_nops (curr_state, before_nops_num))
8406 if (!try_issue_insn (curr_state, insn))
8408 curr_state->accumulated_insns_num++;
8409 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8410 || asm_noperands (PATTERN (insn)) >= 0)
8412 /* Finish bundle containing asm insn. */
8413 curr_state->after_nops_num
8414 = 3 - curr_state->accumulated_insns_num % 3;
8415 curr_state->accumulated_insns_num
8416 += 3 - curr_state->accumulated_insns_num % 3;
8418 else if (ia64_safe_type (insn) == TYPE_L)
8419 curr_state->accumulated_insns_num++;
8421 if (ia64_safe_type (insn) == TYPE_B)
8422 curr_state->branch_deviation
8423 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8424 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8426 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8429 struct bundle_state *curr_state1;
8430 struct bundle_state *allocated_states_chain;
8432 curr_state1 = get_free_bundle_state ();
8433 dfa_state = curr_state1->dfa_state;
8434 allocated_states_chain = curr_state1->allocated_states_chain;
8435 *curr_state1 = *curr_state;
8436 curr_state1->dfa_state = dfa_state;
8437 curr_state1->allocated_states_chain = allocated_states_chain;
8438 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8440 curr_state = curr_state1;
8442 if (!try_issue_nops (curr_state,
8443 3 - curr_state->accumulated_insns_num % 3))
8445 curr_state->after_nops_num
8446 = 3 - curr_state->accumulated_insns_num % 3;
8447 curr_state->accumulated_insns_num
8448 += 3 - curr_state->accumulated_insns_num % 3;
8450 if (!insert_bundle_state (curr_state))
8451 free_bundle_state (curr_state);
8455 /* The following function returns position in the two window bundle
8459 get_max_pos (state_t state)
8461 if (cpu_unit_reservation_p (state, pos_6))
8463 else if (cpu_unit_reservation_p (state, pos_5))
8465 else if (cpu_unit_reservation_p (state, pos_4))
8467 else if (cpu_unit_reservation_p (state, pos_3))
8469 else if (cpu_unit_reservation_p (state, pos_2))
8471 else if (cpu_unit_reservation_p (state, pos_1))
8477 /* The function returns code of a possible template for given position
8478 and state. The function should be called only with 2 values of
8479 position equal to 3 or 6. We avoid generating F NOPs by putting
8480 templates containing F insns at the end of the template search
8481 because undocumented anomaly in McKinley derived cores which can
8482 cause stalls if an F-unit insn (including a NOP) is issued within a
8483 six-cycle window after reading certain application registers (such
8484 as ar.bsp). Furthermore, power-considerations also argue against
8485 the use of F-unit instructions unless they're really needed. */
8488 get_template (state_t state, int pos)
8493 if (cpu_unit_reservation_p (state, _0mmi_))
8495 else if (cpu_unit_reservation_p (state, _0mii_))
8497 else if (cpu_unit_reservation_p (state, _0mmb_))
8499 else if (cpu_unit_reservation_p (state, _0mib_))
8501 else if (cpu_unit_reservation_p (state, _0mbb_))
8503 else if (cpu_unit_reservation_p (state, _0bbb_))
8505 else if (cpu_unit_reservation_p (state, _0mmf_))
8507 else if (cpu_unit_reservation_p (state, _0mfi_))
8509 else if (cpu_unit_reservation_p (state, _0mfb_))
8511 else if (cpu_unit_reservation_p (state, _0mlx_))
8516 if (cpu_unit_reservation_p (state, _1mmi_))
8518 else if (cpu_unit_reservation_p (state, _1mii_))
8520 else if (cpu_unit_reservation_p (state, _1mmb_))
8522 else if (cpu_unit_reservation_p (state, _1mib_))
8524 else if (cpu_unit_reservation_p (state, _1mbb_))
8526 else if (cpu_unit_reservation_p (state, _1bbb_))
8528 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8530 else if (cpu_unit_reservation_p (state, _1mfi_))
8532 else if (cpu_unit_reservation_p (state, _1mfb_))
8534 else if (cpu_unit_reservation_p (state, _1mlx_))
8543 /* True when INSN is important for bundling. */
8545 important_for_bundling_p (rtx insn)
8547 return (INSN_P (insn)
8548 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8549 && GET_CODE (PATTERN (insn)) != USE
8550 && GET_CODE (PATTERN (insn)) != CLOBBER);
8553 /* The following function returns an insn important for insn bundling
8554 followed by INSN and before TAIL. */
8557 get_next_important_insn (rtx insn, rtx tail)
8559 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8560 if (important_for_bundling_p (insn))
8565 /* Add a bundle selector TEMPLATE0 before INSN. */
8568 ia64_add_bundle_selector_before (int template0, rtx insn)
8570 rtx b = gen_bundle_selector (GEN_INT (template0));
8572 ia64_emit_insn_before (b, insn);
8573 #if NR_BUNDLES == 10
8574 if ((template0 == 4 || template0 == 5)
8575 && ia64_except_unwind_info () == UI_TARGET)
8578 rtx note = NULL_RTX;
8580 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8581 first or second slot. If it is and has REG_EH_NOTE set, copy it
8582 to following nops, as br.call sets rp to the address of following
8583 bundle and therefore an EH region end must be on a bundle
8585 insn = PREV_INSN (insn);
8586 for (i = 0; i < 3; i++)
8589 insn = next_active_insn (insn);
8590 while (GET_CODE (insn) == INSN
8591 && get_attr_empty (insn) == EMPTY_YES);
8592 if (GET_CODE (insn) == CALL_INSN)
8593 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8598 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8599 || code == CODE_FOR_nop_b);
8600 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8603 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8610 /* The following function does insn bundling. Bundling means
8611 inserting templates and nop insns to fit insn groups into permitted
8612 templates. Instruction scheduling uses NDFA (non-deterministic
8613 finite automata) encoding informations about the templates and the
8614 inserted nops. Nondeterminism of the automata permits follows
8615 all possible insn sequences very fast.
8617 Unfortunately it is not possible to get information about inserting
8618 nop insns and used templates from the automata states. The
8619 automata only says that we can issue an insn possibly inserting
8620 some nops before it and using some template. Therefore insn
8621 bundling in this function is implemented by using DFA
8622 (deterministic finite automata). We follow all possible insn
8623 sequences by inserting 0-2 nops (that is what the NDFA describe for
8624 insn scheduling) before/after each insn being bundled. We know the
8625 start of simulated processor cycle from insn scheduling (insn
8626 starting a new cycle has TImode).
8628 Simple implementation of insn bundling would create enormous
8629 number of possible insn sequences satisfying information about new
8630 cycle ticks taken from the insn scheduling. To make the algorithm
8631 practical we use dynamic programming. Each decision (about
8632 inserting nops and implicitly about previous decisions) is described
8633 by structure bundle_state (see above). If we generate the same
8634 bundle state (key is automaton state after issuing the insns and
8635 nops for it), we reuse already generated one. As consequence we
8636 reject some decisions which cannot improve the solution and
8637 reduce memory for the algorithm.
8639 When we reach the end of EBB (extended basic block), we choose the
8640 best sequence and then, moving back in EBB, insert templates for
8641 the best alternative. The templates are taken from querying
8642 automaton state for each insn in chosen bundle states.
8644 So the algorithm makes two (forward and backward) passes through
8648 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8650 struct bundle_state *curr_state, *next_state, *best_state;
8651 rtx insn, next_insn;
8653 int i, bundle_end_p, only_bundle_end_p, asm_p;
8654 int pos = 0, max_pos, template0, template1;
8657 enum attr_type type;
8660 /* Count insns in the EBB. */
8661 for (insn = NEXT_INSN (prev_head_insn);
8662 insn && insn != tail;
8663 insn = NEXT_INSN (insn))
8669 dfa_clean_insn_cache ();
8670 initiate_bundle_state_table ();
8671 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8672 /* First (forward) pass -- generation of bundle states. */
8673 curr_state = get_free_bundle_state ();
8674 curr_state->insn = NULL;
8675 curr_state->before_nops_num = 0;
8676 curr_state->after_nops_num = 0;
8677 curr_state->insn_num = 0;
8678 curr_state->cost = 0;
8679 curr_state->accumulated_insns_num = 0;
8680 curr_state->branch_deviation = 0;
8681 curr_state->middle_bundle_stops = 0;
8682 curr_state->next = NULL;
8683 curr_state->originator = NULL;
8684 state_reset (curr_state->dfa_state);
8685 index_to_bundle_states [0] = curr_state;
8687 /* Shift cycle mark if it is put on insn which could be ignored. */
8688 for (insn = NEXT_INSN (prev_head_insn);
8690 insn = NEXT_INSN (insn))
8692 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8693 || GET_CODE (PATTERN (insn)) == USE
8694 || GET_CODE (PATTERN (insn)) == CLOBBER)
8695 && GET_MODE (insn) == TImode)
8697 PUT_MODE (insn, VOIDmode);
8698 for (next_insn = NEXT_INSN (insn);
8700 next_insn = NEXT_INSN (next_insn))
8701 if (INSN_P (next_insn)
8702 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8703 && GET_CODE (PATTERN (next_insn)) != USE
8704 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8705 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8707 PUT_MODE (next_insn, TImode);
8711 /* Forward pass: generation of bundle states. */
8712 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8716 gcc_assert (INSN_P (insn)
8717 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8718 && GET_CODE (PATTERN (insn)) != USE
8719 && GET_CODE (PATTERN (insn)) != CLOBBER);
8720 type = ia64_safe_type (insn);
8721 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8723 index_to_bundle_states [insn_num] = NULL;
8724 for (curr_state = index_to_bundle_states [insn_num - 1];
8726 curr_state = next_state)
8728 pos = curr_state->accumulated_insns_num % 3;
8729 next_state = curr_state->next;
8730 /* We must fill up the current bundle in order to start a
8731 subsequent asm insn in a new bundle. Asm insn is always
8732 placed in a separate bundle. */
8734 = (next_insn != NULL_RTX
8735 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8736 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8737 /* We may fill up the current bundle if it is the cycle end
8738 without a group barrier. */
8740 = (only_bundle_end_p || next_insn == NULL_RTX
8741 || (GET_MODE (next_insn) == TImode
8742 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8743 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8745 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8747 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8749 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8752 gcc_assert (index_to_bundle_states [insn_num]);
8753 for (curr_state = index_to_bundle_states [insn_num];
8755 curr_state = curr_state->next)
8756 if (verbose >= 2 && dump)
8758 /* This structure is taken from generated code of the
8759 pipeline hazard recognizer (see file insn-attrtab.c).
8760 Please don't forget to change the structure if a new
8761 automaton is added to .md file. */
8764 unsigned short one_automaton_state;
8765 unsigned short oneb_automaton_state;
8766 unsigned short two_automaton_state;
8767 unsigned short twob_automaton_state;
8772 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8773 curr_state->unique_num,
8774 (curr_state->originator == NULL
8775 ? -1 : curr_state->originator->unique_num),
8777 curr_state->before_nops_num, curr_state->after_nops_num,
8778 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8779 curr_state->middle_bundle_stops,
8780 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8785 /* We should find a solution because the 2nd insn scheduling has
8787 gcc_assert (index_to_bundle_states [insn_num]);
8788 /* Find a state corresponding to the best insn sequence. */
8790 for (curr_state = index_to_bundle_states [insn_num];
8792 curr_state = curr_state->next)
8793 /* We are just looking at the states with fully filled up last
8794 bundle. The first we prefer insn sequences with minimal cost
8795 then with minimal inserted nops and finally with branch insns
8796 placed in the 3rd slots. */
8797 if (curr_state->accumulated_insns_num % 3 == 0
8798 && (best_state == NULL || best_state->cost > curr_state->cost
8799 || (best_state->cost == curr_state->cost
8800 && (curr_state->accumulated_insns_num
8801 < best_state->accumulated_insns_num
8802 || (curr_state->accumulated_insns_num
8803 == best_state->accumulated_insns_num
8804 && (curr_state->branch_deviation
8805 < best_state->branch_deviation
8806 || (curr_state->branch_deviation
8807 == best_state->branch_deviation
8808 && curr_state->middle_bundle_stops
8809 < best_state->middle_bundle_stops)))))))
8810 best_state = curr_state;
8811 /* Second (backward) pass: adding nops and templates. */
8812 gcc_assert (best_state);
8813 insn_num = best_state->before_nops_num;
8814 template0 = template1 = -1;
8815 for (curr_state = best_state;
8816 curr_state->originator != NULL;
8817 curr_state = curr_state->originator)
8819 insn = curr_state->insn;
8820 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8821 || asm_noperands (PATTERN (insn)) >= 0);
8823 if (verbose >= 2 && dump)
8827 unsigned short one_automaton_state;
8828 unsigned short oneb_automaton_state;
8829 unsigned short two_automaton_state;
8830 unsigned short twob_automaton_state;
8835 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8836 curr_state->unique_num,
8837 (curr_state->originator == NULL
8838 ? -1 : curr_state->originator->unique_num),
8840 curr_state->before_nops_num, curr_state->after_nops_num,
8841 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8842 curr_state->middle_bundle_stops,
8843 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8846 /* Find the position in the current bundle window. The window can
8847 contain at most two bundles. Two bundle window means that
8848 the processor will make two bundle rotation. */
8849 max_pos = get_max_pos (curr_state->dfa_state);
8851 /* The following (negative template number) means that the
8852 processor did one bundle rotation. */
8853 || (max_pos == 3 && template0 < 0))
8855 /* We are at the end of the window -- find template(s) for
8859 template0 = get_template (curr_state->dfa_state, 3);
8862 template1 = get_template (curr_state->dfa_state, 3);
8863 template0 = get_template (curr_state->dfa_state, 6);
8866 if (max_pos > 3 && template1 < 0)
8867 /* It may happen when we have the stop inside a bundle. */
8869 gcc_assert (pos <= 3);
8870 template1 = get_template (curr_state->dfa_state, 3);
8874 /* Emit nops after the current insn. */
8875 for (i = 0; i < curr_state->after_nops_num; i++)
8878 emit_insn_after (nop, insn);
8880 gcc_assert (pos >= 0);
8883 /* We are at the start of a bundle: emit the template
8884 (it should be defined). */
8885 gcc_assert (template0 >= 0);
8886 ia64_add_bundle_selector_before (template0, nop);
8887 /* If we have two bundle window, we make one bundle
8888 rotation. Otherwise template0 will be undefined
8889 (negative value). */
8890 template0 = template1;
8894 /* Move the position backward in the window. Group barrier has
8895 no slot. Asm insn takes all bundle. */
8896 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8897 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8898 && asm_noperands (PATTERN (insn)) < 0)
8900 /* Long insn takes 2 slots. */
8901 if (ia64_safe_type (insn) == TYPE_L)
8903 gcc_assert (pos >= 0);
8905 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8906 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8907 && asm_noperands (PATTERN (insn)) < 0)
8909 /* The current insn is at the bundle start: emit the
8911 gcc_assert (template0 >= 0);
8912 ia64_add_bundle_selector_before (template0, insn);
8913 b = PREV_INSN (insn);
8915 /* See comment above in analogous place for emitting nops
8917 template0 = template1;
8920 /* Emit nops after the current insn. */
8921 for (i = 0; i < curr_state->before_nops_num; i++)
8924 ia64_emit_insn_before (nop, insn);
8925 nop = PREV_INSN (insn);
8928 gcc_assert (pos >= 0);
8931 /* See comment above in analogous place for emitting nops
8933 gcc_assert (template0 >= 0);
8934 ia64_add_bundle_selector_before (template0, insn);
8935 b = PREV_INSN (insn);
8937 template0 = template1;
8943 #ifdef ENABLE_CHECKING
8945 /* Assert right calculation of middle_bundle_stops. */
8946 int num = best_state->middle_bundle_stops;
8947 bool start_bundle = true, end_bundle = false;
8949 for (insn = NEXT_INSN (prev_head_insn);
8950 insn && insn != tail;
8951 insn = NEXT_INSN (insn))
8955 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8956 start_bundle = true;
8961 for (next_insn = NEXT_INSN (insn);
8962 next_insn && next_insn != tail;
8963 next_insn = NEXT_INSN (next_insn))
8964 if (INSN_P (next_insn)
8965 && (ia64_safe_itanium_class (next_insn)
8966 != ITANIUM_CLASS_IGNORE
8967 || recog_memoized (next_insn)
8968 == CODE_FOR_bundle_selector)
8969 && GET_CODE (PATTERN (next_insn)) != USE
8970 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8973 end_bundle = next_insn == NULL_RTX
8974 || next_insn == tail
8975 || (INSN_P (next_insn)
8976 && recog_memoized (next_insn)
8977 == CODE_FOR_bundle_selector);
8978 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8979 && !start_bundle && !end_bundle
8981 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8982 && asm_noperands (PATTERN (next_insn)) < 0)
8985 start_bundle = false;
8989 gcc_assert (num == 0);
8993 free (index_to_bundle_states);
8994 finish_bundle_state_table ();
8996 dfa_clean_insn_cache ();
8999 /* The following function is called at the end of scheduling BB or
9000 EBB. After reload, it inserts stop bits and does insn bundling. */
9003 ia64_sched_finish (FILE *dump, int sched_verbose)
9006 fprintf (dump, "// Finishing schedule.\n");
9007 if (!reload_completed)
9009 if (reload_completed)
9011 final_emit_insn_group_barriers (dump);
9012 bundling (dump, sched_verbose, current_sched_info->prev_head,
9013 current_sched_info->next_tail);
9014 if (sched_verbose && dump)
9015 fprintf (dump, "// finishing %d-%d\n",
9016 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9017 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9023 /* The following function inserts stop bits in scheduled BB or EBB. */
9026 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9029 int need_barrier_p = 0;
9030 int seen_good_insn = 0;
9032 init_insn_group_barriers ();
9034 for (insn = NEXT_INSN (current_sched_info->prev_head);
9035 insn != current_sched_info->next_tail;
9036 insn = NEXT_INSN (insn))
9038 if (GET_CODE (insn) == BARRIER)
9040 rtx last = prev_active_insn (insn);
9044 if (GET_CODE (last) == JUMP_INSN
9045 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9046 last = prev_active_insn (last);
9047 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9048 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9050 init_insn_group_barriers ();
9054 else if (NONDEBUG_INSN_P (insn))
9056 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9058 init_insn_group_barriers ();
9062 else if (need_barrier_p || group_barrier_needed (insn)
9063 || (mflag_sched_stop_bits_after_every_cycle
9064 && GET_MODE (insn) == TImode
9067 if (TARGET_EARLY_STOP_BITS)
9072 last != current_sched_info->prev_head;
9073 last = PREV_INSN (last))
9074 if (INSN_P (last) && GET_MODE (last) == TImode
9075 && stops_p [INSN_UID (last)])
9077 if (last == current_sched_info->prev_head)
9079 last = prev_active_insn (last);
9081 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9082 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9084 init_insn_group_barriers ();
9085 for (last = NEXT_INSN (last);
9087 last = NEXT_INSN (last))
9090 group_barrier_needed (last);
9091 if (recog_memoized (last) >= 0
9092 && important_for_bundling_p (last))
9098 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9100 init_insn_group_barriers ();
9103 group_barrier_needed (insn);
9104 if (recog_memoized (insn) >= 0
9105 && important_for_bundling_p (insn))
9108 else if (recog_memoized (insn) >= 0
9109 && important_for_bundling_p (insn))
9111 need_barrier_p = (GET_CODE (insn) == CALL_INSN
9112 || GET_CODE (PATTERN (insn)) == ASM_INPUT
9113 || asm_noperands (PATTERN (insn)) >= 0);
9120 /* If the following function returns TRUE, we will use the DFA
9124 ia64_first_cycle_multipass_dfa_lookahead (void)
9126 return (reload_completed ? 6 : 4);
9129 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9132 ia64_init_dfa_pre_cycle_insn (void)
9134 if (temp_dfa_state == NULL)
9136 dfa_state_size = state_size ();
9137 temp_dfa_state = xmalloc (dfa_state_size);
9138 prev_cycle_state = xmalloc (dfa_state_size);
9140 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9141 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9142 recog_memoized (dfa_pre_cycle_insn);
9143 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9144 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9145 recog_memoized (dfa_stop_insn);
9148 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9149 used by the DFA insn scheduler. */
9152 ia64_dfa_pre_cycle_insn (void)
9154 return dfa_pre_cycle_insn;
9157 /* The following function returns TRUE if PRODUCER (of type ilog or
9158 ld) produces address for CONSUMER (of type st or stf). */
9161 ia64_st_address_bypass_p (rtx producer, rtx consumer)
9165 gcc_assert (producer && consumer);
9166 dest = ia64_single_set (producer);
9168 reg = SET_DEST (dest);
9170 if (GET_CODE (reg) == SUBREG)
9171 reg = SUBREG_REG (reg);
9172 gcc_assert (GET_CODE (reg) == REG);
9174 dest = ia64_single_set (consumer);
9176 mem = SET_DEST (dest);
9177 gcc_assert (mem && GET_CODE (mem) == MEM);
9178 return reg_mentioned_p (reg, mem);
9181 /* The following function returns TRUE if PRODUCER (of type ilog or
9182 ld) produces address for CONSUMER (of type ld or fld). */
9185 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9187 rtx dest, src, reg, mem;
9189 gcc_assert (producer && consumer);
9190 dest = ia64_single_set (producer);
9192 reg = SET_DEST (dest);
9194 if (GET_CODE (reg) == SUBREG)
9195 reg = SUBREG_REG (reg);
9196 gcc_assert (GET_CODE (reg) == REG);
9198 src = ia64_single_set (consumer);
9200 mem = SET_SRC (src);
9203 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9204 mem = XVECEXP (mem, 0, 0);
9205 else if (GET_CODE (mem) == IF_THEN_ELSE)
9206 /* ??? Is this bypass necessary for ld.c? */
9208 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9209 mem = XEXP (mem, 1);
9212 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9213 mem = XEXP (mem, 0);
9215 if (GET_CODE (mem) == UNSPEC)
9217 int c = XINT (mem, 1);
9219 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9220 || c == UNSPEC_LDSA);
9221 mem = XVECEXP (mem, 0, 0);
9224 /* Note that LO_SUM is used for GOT loads. */
9225 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9227 return reg_mentioned_p (reg, mem);
9230 /* The following function returns TRUE if INSN produces address for a
9231 load/store insn. We will place such insns into M slot because it
9232 decreases its latency time. */
9235 ia64_produce_address_p (rtx insn)
9241 /* Emit pseudo-ops for the assembler to describe predicate relations.
9242 At present this assumes that we only consider predicate pairs to
9243 be mutex, and that the assembler can deduce proper values from
9244 straight-line code. */
9247 emit_predicate_relation_info (void)
9251 FOR_EACH_BB_REVERSE (bb)
9254 rtx head = BB_HEAD (bb);
9256 /* We only need such notes at code labels. */
9257 if (GET_CODE (head) != CODE_LABEL)
9259 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9260 head = NEXT_INSN (head);
9262 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9263 grabbing the entire block of predicate registers. */
9264 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9265 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9267 rtx p = gen_rtx_REG (BImode, r);
9268 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9269 if (head == BB_END (bb))
9275 /* Look for conditional calls that do not return, and protect predicate
9276 relations around them. Otherwise the assembler will assume the call
9277 returns, and complain about uses of call-clobbered predicates after
9279 FOR_EACH_BB_REVERSE (bb)
9281 rtx insn = BB_HEAD (bb);
9285 if (GET_CODE (insn) == CALL_INSN
9286 && GET_CODE (PATTERN (insn)) == COND_EXEC
9287 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9289 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9290 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9291 if (BB_HEAD (bb) == insn)
9293 if (BB_END (bb) == insn)
9297 if (insn == BB_END (bb))
9299 insn = NEXT_INSN (insn);
9304 /* Perform machine dependent operations on the rtl chain INSNS. */
9309 /* We are freeing block_for_insn in the toplev to keep compatibility
9310 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9311 compute_bb_for_insn ();
9313 /* If optimizing, we'll have split before scheduling. */
9317 if (optimize && ia64_flag_schedule_insns2
9318 && dbg_cnt (ia64_sched2))
9320 timevar_push (TV_SCHED2);
9321 ia64_final_schedule = 1;
9323 initiate_bundle_states ();
9324 ia64_nop = make_insn_raw (gen_nop ());
9325 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9326 recog_memoized (ia64_nop);
9327 clocks_length = get_max_uid () + 1;
9328 stops_p = XCNEWVEC (char, clocks_length);
9330 if (ia64_tune == PROCESSOR_ITANIUM2)
9332 pos_1 = get_cpu_unit_code ("2_1");
9333 pos_2 = get_cpu_unit_code ("2_2");
9334 pos_3 = get_cpu_unit_code ("2_3");
9335 pos_4 = get_cpu_unit_code ("2_4");
9336 pos_5 = get_cpu_unit_code ("2_5");
9337 pos_6 = get_cpu_unit_code ("2_6");
9338 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9339 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9340 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9341 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9342 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9343 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9344 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9345 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9346 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9347 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9348 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9349 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9350 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9351 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9352 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9353 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9354 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9355 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9356 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9357 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9361 pos_1 = get_cpu_unit_code ("1_1");
9362 pos_2 = get_cpu_unit_code ("1_2");
9363 pos_3 = get_cpu_unit_code ("1_3");
9364 pos_4 = get_cpu_unit_code ("1_4");
9365 pos_5 = get_cpu_unit_code ("1_5");
9366 pos_6 = get_cpu_unit_code ("1_6");
9367 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9368 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9369 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9370 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9371 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9372 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9373 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9374 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9375 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9376 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9377 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9378 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9379 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9380 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9381 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9382 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9383 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9384 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9385 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9386 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9389 if (flag_selective_scheduling2
9390 && !maybe_skip_selective_scheduling ())
9391 run_selective_scheduling ();
9395 /* Redo alignment computation, as it might gone wrong. */
9396 compute_alignments ();
9398 /* We cannot reuse this one because it has been corrupted by the
9400 finish_bundle_states ();
9403 emit_insn_group_barriers (dump_file);
9405 ia64_final_schedule = 0;
9406 timevar_pop (TV_SCHED2);
9409 emit_all_insn_group_barriers (dump_file);
9413 /* A call must not be the last instruction in a function, so that the
9414 return address is still within the function, so that unwinding works
9415 properly. Note that IA-64 differs from dwarf2 on this point. */
9416 if (ia64_except_unwind_info () == UI_TARGET)
9421 insn = get_last_insn ();
9422 if (! INSN_P (insn))
9423 insn = prev_active_insn (insn);
9426 /* Skip over insns that expand to nothing. */
9427 while (GET_CODE (insn) == INSN
9428 && get_attr_empty (insn) == EMPTY_YES)
9430 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9431 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9433 insn = prev_active_insn (insn);
9435 if (GET_CODE (insn) == CALL_INSN)
9438 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9439 emit_insn (gen_break_f ());
9440 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9445 emit_predicate_relation_info ();
9447 if (ia64_flag_var_tracking)
9449 timevar_push (TV_VAR_TRACKING);
9450 variable_tracking_main ();
9451 timevar_pop (TV_VAR_TRACKING);
9453 df_finish_pass (false);
9456 /* Return true if REGNO is used by the epilogue. */
9459 ia64_epilogue_uses (int regno)
9464 /* With a call to a function in another module, we will write a new
9465 value to "gp". After returning from such a call, we need to make
9466 sure the function restores the original gp-value, even if the
9467 function itself does not use the gp anymore. */
9468 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9470 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9471 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9472 /* For functions defined with the syscall_linkage attribute, all
9473 input registers are marked as live at all function exits. This
9474 prevents the register allocator from using the input registers,
9475 which in turn makes it possible to restart a system call after
9476 an interrupt without having to save/restore the input registers.
9477 This also prevents kernel data from leaking to application code. */
9478 return lookup_attribute ("syscall_linkage",
9479 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9482 /* Conditional return patterns can't represent the use of `b0' as
9483 the return address, so we force the value live this way. */
9487 /* Likewise for ar.pfs, which is used by br.ret. */
9495 /* Return true if REGNO is used by the frame unwinder. */
9498 ia64_eh_uses (int regno)
9502 if (! reload_completed)
9508 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9509 if (regno == current_frame_info.r[r]
9510 || regno == emitted_frame_related_regs[r])
9516 /* Return true if this goes in small data/bss. */
9518 /* ??? We could also support own long data here. Generating movl/add/ld8
9519 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9520 code faster because there is one less load. This also includes incomplete
9521 types which can't go in sdata/sbss. */
9524 ia64_in_small_data_p (const_tree exp)
9526 if (TARGET_NO_SDATA)
9529 /* We want to merge strings, so we never consider them small data. */
9530 if (TREE_CODE (exp) == STRING_CST)
9533 /* Functions are never small data. */
9534 if (TREE_CODE (exp) == FUNCTION_DECL)
9537 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9539 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9541 if (strcmp (section, ".sdata") == 0
9542 || strncmp (section, ".sdata.", 7) == 0
9543 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9544 || strcmp (section, ".sbss") == 0
9545 || strncmp (section, ".sbss.", 6) == 0
9546 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9551 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9553 /* If this is an incomplete type with size 0, then we can't put it
9554 in sdata because it might be too big when completed. */
9555 if (size > 0 && size <= ia64_section_threshold)
9562 /* Output assembly directives for prologue regions. */
9564 /* The current basic block number. */
9566 static bool last_block;
9568 /* True if we need a copy_state command at the start of the next block. */
9570 static bool need_copy_state;
9572 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9573 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9576 /* Emit a debugging label after a call-frame-related insn. We'd
9577 rather output the label right away, but we'd have to output it
9578 after, not before, the instruction, and the instruction has not
9579 been output yet. So we emit the label after the insn, delete it to
9580 avoid introducing basic blocks, and mark it as preserved, such that
9581 it is still output, given that it is referenced in debug info. */
9584 ia64_emit_deleted_label_after_insn (rtx insn)
9586 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9587 rtx lb = gen_label_rtx ();
9588 rtx label_insn = emit_label_after (lb, insn);
9590 LABEL_PRESERVE_P (lb) = 1;
9592 delete_insn (label_insn);
9594 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9596 return xstrdup (label);
9599 /* Define the CFA after INSN with the steady-state definition. */
9602 ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
9604 rtx fp = frame_pointer_needed
9605 ? hard_frame_pointer_rtx
9606 : stack_pointer_rtx;
9607 const char *label = ia64_emit_deleted_label_after_insn (insn);
9614 ia64_initial_elimination_offset
9615 (REGNO (arg_pointer_rtx), REGNO (fp))
9616 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9619 /* All we need to do here is avoid a crash in the generic dwarf2
9620 processing. The real CFA definition is set up above. */
9623 ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label),
9624 rtx ARG_UNUSED (pattern),
9627 gcc_assert (index == UNSPECV_ALLOC);
9630 /* The generic dwarf2 frame debug info generator does not define a
9631 separate region for the very end of the epilogue, so refrain from
9632 doing so in the IA64-specific code as well. */
9634 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9636 /* The function emits unwind directives for the start of an epilogue. */
9639 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9641 /* If this isn't the last block of the function, then we need to label the
9642 current state, and copy it back in at the start of the next block. */
9647 fprintf (asm_out_file, "\t.label_state %d\n",
9648 ++cfun->machine->state_num);
9649 need_copy_state = true;
9653 fprintf (asm_out_file, "\t.restore sp\n");
9654 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9655 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9656 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9659 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9662 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9663 bool unwind, bool frame)
9665 rtx dest = SET_DEST (pat);
9666 rtx src = SET_SRC (pat);
9668 if (dest == stack_pointer_rtx)
9670 if (GET_CODE (src) == PLUS)
9672 rtx op0 = XEXP (src, 0);
9673 rtx op1 = XEXP (src, 1);
9675 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9677 if (INTVAL (op1) < 0)
9679 gcc_assert (!frame_pointer_needed);
9681 fprintf (asm_out_file,
9682 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9684 ia64_dwarf2out_def_steady_cfa (insn, frame);
9687 process_epilogue (asm_out_file, insn, unwind, frame);
9691 gcc_assert (src == hard_frame_pointer_rtx);
9692 process_epilogue (asm_out_file, insn, unwind, frame);
9695 else if (dest == hard_frame_pointer_rtx)
9697 gcc_assert (src == stack_pointer_rtx);
9698 gcc_assert (frame_pointer_needed);
9701 fprintf (asm_out_file, "\t.vframe r%d\n",
9702 ia64_dbx_register_number (REGNO (dest)));
9703 ia64_dwarf2out_def_steady_cfa (insn, frame);
9709 /* This function processes a SET pattern for REG_CFA_REGISTER. */
9712 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
9714 rtx dest = SET_DEST (pat);
9715 rtx src = SET_SRC (pat);
9717 int dest_regno = REGNO (dest);
9718 int src_regno = REGNO (src);
9723 /* Saving return address pointer. */
9724 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9726 fprintf (asm_out_file, "\t.save rp, r%d\n",
9727 ia64_dbx_register_number (dest_regno));
9731 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9733 fprintf (asm_out_file, "\t.save pr, r%d\n",
9734 ia64_dbx_register_number (dest_regno));
9737 case AR_UNAT_REGNUM:
9738 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9740 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9741 ia64_dbx_register_number (dest_regno));
9745 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9747 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9748 ia64_dbx_register_number (dest_regno));
9752 /* Everything else should indicate being stored to memory. */
9757 /* This function processes a SET pattern for REG_CFA_OFFSET. */
9760 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
9762 rtx dest = SET_DEST (pat);
9763 rtx src = SET_SRC (pat);
9764 int src_regno = REGNO (src);
9769 gcc_assert (MEM_P (dest));
9770 if (GET_CODE (XEXP (dest, 0)) == REG)
9772 base = XEXP (dest, 0);
9777 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9778 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9779 base = XEXP (XEXP (dest, 0), 0);
9780 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9783 if (base == hard_frame_pointer_rtx)
9785 saveop = ".savepsp";
9790 gcc_assert (base == stack_pointer_rtx);
9794 src_regno = REGNO (src);
9798 gcc_assert (!current_frame_info.r[reg_save_b0]);
9800 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
9805 gcc_assert (!current_frame_info.r[reg_save_pr]);
9807 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
9812 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9814 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
9819 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9821 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
9825 case AR_UNAT_REGNUM:
9826 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9828 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
9837 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9838 1 << (src_regno - GR_REG (4)));
9847 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9848 1 << (src_regno - BR_REG (1)));
9856 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9857 1 << (src_regno - FR_REG (2)));
9860 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9861 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9862 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9863 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9865 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9866 1 << (src_regno - FR_REG (12)));
9870 /* ??? For some reason we mark other general registers, even those
9871 we can't represent in the unwind info. Ignore them. */
9876 /* This function looks at a single insn and emits any directives
9877 required to unwind this insn. */
9880 ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
9882 bool unwind = ia64_except_unwind_info () == UI_TARGET;
9883 bool frame = dwarf2out_do_frame ();
9887 if (!unwind && !frame)
9890 if (NOTE_INSN_BASIC_BLOCK_P (insn))
9892 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9894 /* Restore unwind state from immediately before the epilogue. */
9895 if (need_copy_state)
9899 fprintf (asm_out_file, "\t.body\n");
9900 fprintf (asm_out_file, "\t.copy_state %d\n",
9901 cfun->machine->state_num);
9903 if (IA64_CHANGE_CFA_IN_EPILOGUE)
9904 ia64_dwarf2out_def_steady_cfa (insn, frame);
9905 need_copy_state = false;
9909 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9912 /* Look for the ALLOC insn. */
9913 if (INSN_CODE (insn) == CODE_FOR_alloc)
9915 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
9916 int dest_regno = REGNO (dest);
9918 /* If this is the final destination for ar.pfs, then this must
9919 be the alloc in the prologue. */
9920 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
9923 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9924 ia64_dbx_register_number (dest_regno));
9928 /* This must be an alloc before a sibcall. We must drop the
9929 old frame info. The easiest way to drop the old frame
9930 info is to ensure we had a ".restore sp" directive
9931 followed by a new prologue. If the procedure doesn't
9932 have a memory-stack frame, we'll issue a dummy ".restore
9934 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
9935 /* if haven't done process_epilogue() yet, do it now */
9936 process_epilogue (asm_out_file, insn, unwind, frame);
9938 fprintf (asm_out_file, "\t.prologue\n");
9943 handled_one = false;
9944 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
9945 switch (REG_NOTE_KIND (note))
9947 case REG_CFA_ADJUST_CFA:
9948 pat = XEXP (note, 0);
9950 pat = PATTERN (insn);
9951 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
9955 case REG_CFA_OFFSET:
9956 pat = XEXP (note, 0);
9958 pat = PATTERN (insn);
9959 process_cfa_offset (asm_out_file, pat, unwind);
9963 case REG_CFA_REGISTER:
9964 pat = XEXP (note, 0);
9966 pat = PATTERN (insn);
9967 process_cfa_register (asm_out_file, pat, unwind);
9971 case REG_FRAME_RELATED_EXPR:
9972 case REG_CFA_DEF_CFA:
9973 case REG_CFA_EXPRESSION:
9974 case REG_CFA_RESTORE:
9975 case REG_CFA_SET_VDRAP:
9976 /* Not used in the ia64 port. */
9980 /* Not a frame-related note. */
9984 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
9985 explicit action to take. No guessing required. */
9986 gcc_assert (handled_one);
9989 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
9992 ia64_asm_emit_except_personality (rtx personality)
9994 fputs ("\t.personality\t", asm_out_file);
9995 output_addr_const (asm_out_file, personality);
9996 fputc ('\n', asm_out_file);
9999 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10002 ia64_asm_init_sections (void)
10004 exception_section = get_unnamed_section (0, output_section_asm_op,
10008 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10010 static enum unwind_info_type
10011 ia64_debug_unwind_info (void)
10016 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
10018 static enum unwind_info_type
10019 ia64_except_unwind_info (void)
10021 /* Honor the --enable-sjlj-exceptions configure switch. */
10022 #ifdef CONFIG_UNWIND_EXCEPTIONS
10023 if (CONFIG_UNWIND_EXCEPTIONS)
10027 /* For simplicity elsewhere in this file, indicate that all unwind
10028 info is disabled if we're not emitting unwind tables. */
10029 if (!flag_exceptions && !flag_unwind_tables)
10038 IA64_BUILTIN_COPYSIGNQ,
10039 IA64_BUILTIN_FABSQ,
10040 IA64_BUILTIN_FLUSHRS,
10042 IA64_BUILTIN_HUGE_VALQ,
10046 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10049 ia64_init_builtins (void)
10055 /* The __fpreg type. */
10056 fpreg_type = make_node (REAL_TYPE);
10057 TYPE_PRECISION (fpreg_type) = 82;
10058 layout_type (fpreg_type);
10059 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10061 /* The __float80 type. */
10062 float80_type = make_node (REAL_TYPE);
10063 TYPE_PRECISION (float80_type) = 80;
10064 layout_type (float80_type);
10065 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10067 /* The __float128 type. */
10071 tree float128_type = make_node (REAL_TYPE);
10073 TYPE_PRECISION (float128_type) = 128;
10074 layout_type (float128_type);
10075 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10077 /* TFmode support builtins. */
10078 ftype = build_function_type (float128_type, void_list_node);
10079 decl = add_builtin_function ("__builtin_infq", ftype,
10080 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10082 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10084 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10085 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10087 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10089 ftype = build_function_type_list (float128_type,
10092 decl = add_builtin_function ("__builtin_fabsq", ftype,
10093 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10094 "__fabstf2", NULL_TREE);
10095 TREE_READONLY (decl) = 1;
10096 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10098 ftype = build_function_type_list (float128_type,
10102 decl = add_builtin_function ("__builtin_copysignq", ftype,
10103 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10104 "__copysigntf3", NULL_TREE);
10105 TREE_READONLY (decl) = 1;
10106 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10109 /* Under HPUX, this is a synonym for "long double". */
10110 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10113 /* Fwrite on VMS is non-standard. */
10114 if (TARGET_ABI_OPEN_VMS)
10116 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
10117 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
10120 #define def_builtin(name, type, code) \
10121 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10124 decl = def_builtin ("__builtin_ia64_bsp",
10125 build_function_type (ptr_type_node, void_list_node),
10127 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10129 decl = def_builtin ("__builtin_ia64_flushrs",
10130 build_function_type (void_type_node, void_list_node),
10131 IA64_BUILTIN_FLUSHRS);
10132 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10138 if (built_in_decls [BUILT_IN_FINITE])
10139 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
10141 if (built_in_decls [BUILT_IN_FINITEF])
10142 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
10144 if (built_in_decls [BUILT_IN_FINITEL])
10145 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
10151 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10152 enum machine_mode mode ATTRIBUTE_UNUSED,
10153 int ignore ATTRIBUTE_UNUSED)
10155 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10156 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10160 case IA64_BUILTIN_BSP:
10161 if (! target || ! register_operand (target, DImode))
10162 target = gen_reg_rtx (DImode);
10163 emit_insn (gen_bsp_value (target));
10164 #ifdef POINTERS_EXTEND_UNSIGNED
10165 target = convert_memory_address (ptr_mode, target);
10169 case IA64_BUILTIN_FLUSHRS:
10170 emit_insn (gen_flushrs ());
10173 case IA64_BUILTIN_INFQ:
10174 case IA64_BUILTIN_HUGE_VALQ:
10176 REAL_VALUE_TYPE inf;
10180 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
10182 tmp = validize_mem (force_const_mem (mode, tmp));
10185 target = gen_reg_rtx (mode);
10187 emit_move_insn (target, tmp);
10191 case IA64_BUILTIN_FABSQ:
10192 case IA64_BUILTIN_COPYSIGNQ:
10193 return expand_call (exp, target, ignore);
10196 gcc_unreachable ();
10202 /* Return the ia64 builtin for CODE. */
10205 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10207 if (code >= IA64_BUILTIN_max)
10208 return error_mark_node;
10210 return ia64_builtins[code];
10213 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10214 most significant bits of the stack slot. */
10217 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
10219 /* Exception to normal case for structures/unions/etc. */
10221 if (type && AGGREGATE_TYPE_P (type)
10222 && int_size_in_bytes (type) < UNITS_PER_WORD)
10225 /* Fall back to the default. */
10226 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10229 /* Emit text to declare externally defined variables and functions, because
10230 the Intel assembler does not support undefined externals. */
10233 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10235 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10236 set in order to avoid putting out names that are never really
10238 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10240 /* maybe_assemble_visibility will return 1 if the assembler
10241 visibility directive is output. */
10242 int need_visibility = ((*targetm.binds_local_p) (decl)
10243 && maybe_assemble_visibility (decl));
10245 #ifdef DO_CRTL_NAMES
10249 /* GNU as does not need anything here, but the HP linker does
10250 need something for external functions. */
10251 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10252 && TREE_CODE (decl) == FUNCTION_DECL)
10253 (*targetm.asm_out.globalize_decl_name) (file, decl);
10254 else if (need_visibility && !TARGET_GNU_AS)
10255 (*targetm.asm_out.globalize_label) (file, name);
10259 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10260 modes of word_mode and larger. Rename the TFmode libfuncs using the
10261 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10262 backward compatibility. */
10265 ia64_init_libfuncs (void)
10267 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10268 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10269 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10270 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10272 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10273 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10274 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10275 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10276 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10278 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10279 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10280 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10281 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10282 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10283 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10285 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10286 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10287 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10288 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10289 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10291 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10292 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10293 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10294 /* HP-UX 11.23 libc does not have a function for unsigned
10295 SImode-to-TFmode conversion. */
10296 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10299 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10302 ia64_hpux_init_libfuncs (void)
10304 ia64_init_libfuncs ();
10306 /* The HP SI millicode division and mod functions expect DI arguments.
10307 By turning them off completely we avoid using both libgcc and the
10308 non-standard millicode routines and use the HP DI millicode routines
10311 set_optab_libfunc (sdiv_optab, SImode, 0);
10312 set_optab_libfunc (udiv_optab, SImode, 0);
10313 set_optab_libfunc (smod_optab, SImode, 0);
10314 set_optab_libfunc (umod_optab, SImode, 0);
10316 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10317 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10318 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10319 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10321 /* HP-UX libc has TF min/max/abs routines in it. */
10322 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10323 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10324 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10326 /* ia64_expand_compare uses this. */
10327 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10329 /* These should never be used. */
10330 set_optab_libfunc (eq_optab, TFmode, 0);
10331 set_optab_libfunc (ne_optab, TFmode, 0);
10332 set_optab_libfunc (gt_optab, TFmode, 0);
10333 set_optab_libfunc (ge_optab, TFmode, 0);
10334 set_optab_libfunc (lt_optab, TFmode, 0);
10335 set_optab_libfunc (le_optab, TFmode, 0);
10338 /* Rename the division and modulus functions in VMS. */
10341 ia64_vms_init_libfuncs (void)
10343 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10344 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10345 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10346 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10347 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10348 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10349 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10350 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10351 abort_libfunc = init_one_libfunc ("decc$abort");
10352 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10353 #ifdef MEM_LIBFUNCS_INIT
10358 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10359 the HPUX conventions. */
10362 ia64_sysv4_init_libfuncs (void)
10364 ia64_init_libfuncs ();
10366 /* These functions are not part of the HPUX TFmode interface. We
10367 use them instead of _U_Qfcmp, which doesn't work the way we
10369 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10370 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10371 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10372 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10373 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10374 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10376 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10377 glibc doesn't have them. */
10383 ia64_soft_fp_init_libfuncs (void)
10388 ia64_vms_valid_pointer_mode (enum machine_mode mode)
10390 return (mode == SImode || mode == DImode);
10393 /* For HPUX, it is illegal to have relocations in shared segments. */
10396 ia64_hpux_reloc_rw_mask (void)
10401 /* For others, relax this so that relocations to local data goes in
10402 read-only segments, but we still cannot allow global relocations
10403 in read-only segments. */
10406 ia64_reloc_rw_mask (void)
10408 return flag_pic ? 3 : 2;
10411 /* Return the section to use for X. The only special thing we do here
10412 is to honor small data. */
10415 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10416 unsigned HOST_WIDE_INT align)
10418 if (GET_MODE_SIZE (mode) > 0
10419 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10420 && !TARGET_NO_SDATA)
10421 return sdata_section;
10423 return default_elf_select_rtx_section (mode, x, align);
10426 static unsigned int
10427 ia64_section_type_flags (tree decl, const char *name, int reloc)
10429 unsigned int flags = 0;
10431 if (strcmp (name, ".sdata") == 0
10432 || strncmp (name, ".sdata.", 7) == 0
10433 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10434 || strncmp (name, ".sdata2.", 8) == 0
10435 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10436 || strcmp (name, ".sbss") == 0
10437 || strncmp (name, ".sbss.", 6) == 0
10438 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10439 flags = SECTION_SMALL;
10441 #if TARGET_ABI_OPEN_VMS
10442 if (decl && DECL_ATTRIBUTES (decl)
10443 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10444 flags |= SECTION_VMS_OVERLAY;
10447 flags |= default_section_type_flags (decl, name, reloc);
10451 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10452 structure type and that the address of that type should be passed
10453 in out0, rather than in r8. */
10456 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10458 tree ret_type = TREE_TYPE (fntype);
10460 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10461 as the structure return address parameter, if the return value
10462 type has a non-trivial copy constructor or destructor. It is not
10463 clear if this same convention should be used for other
10464 programming languages. Until G++ 3.4, we incorrectly used r8 for
10465 these return values. */
10466 return (abi_version_at_least (2)
10468 && TYPE_MODE (ret_type) == BLKmode
10469 && TREE_ADDRESSABLE (ret_type)
10470 && strcmp (lang_hooks.name, "GNU C++") == 0);
10473 /* Output the assembler code for a thunk function. THUNK_DECL is the
10474 declaration for the thunk function itself, FUNCTION is the decl for
10475 the target function. DELTA is an immediate constant offset to be
10476 added to THIS. If VCALL_OFFSET is nonzero, the word at
10477 *(*this + vcall_offset) should be added to THIS. */
10480 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10481 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10484 rtx this_rtx, insn, funexp;
10485 unsigned int this_parmno;
10486 unsigned int this_regno;
10489 reload_completed = 1;
10490 epilogue_completed = 1;
10492 /* Set things up as ia64_expand_prologue might. */
10493 last_scratch_gr_reg = 15;
10495 memset (¤t_frame_info, 0, sizeof (current_frame_info));
10496 current_frame_info.spill_cfa_off = -16;
10497 current_frame_info.n_input_regs = 1;
10498 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10500 /* Mark the end of the (empty) prologue. */
10501 emit_note (NOTE_INSN_PROLOGUE_END);
10503 /* Figure out whether "this" will be the first parameter (the
10504 typical case) or the second parameter (as happens when the
10505 virtual function returns certain class objects). */
10507 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10509 this_regno = IN_REG (this_parmno);
10510 if (!TARGET_REG_NAMES)
10511 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10513 this_rtx = gen_rtx_REG (Pmode, this_regno);
10515 /* Apply the constant offset, if required. */
10516 delta_rtx = GEN_INT (delta);
10519 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10520 REG_POINTER (tmp) = 1;
10521 if (delta && satisfies_constraint_I (delta_rtx))
10523 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10527 emit_insn (gen_ptr_extend (this_rtx, tmp));
10531 if (!satisfies_constraint_I (delta_rtx))
10533 rtx tmp = gen_rtx_REG (Pmode, 2);
10534 emit_move_insn (tmp, delta_rtx);
10537 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10540 /* Apply the offset from the vtable, if required. */
10543 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10544 rtx tmp = gen_rtx_REG (Pmode, 2);
10548 rtx t = gen_rtx_REG (ptr_mode, 2);
10549 REG_POINTER (t) = 1;
10550 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10551 if (satisfies_constraint_I (vcall_offset_rtx))
10553 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10557 emit_insn (gen_ptr_extend (tmp, t));
10560 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10564 if (!satisfies_constraint_J (vcall_offset_rtx))
10566 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10567 emit_move_insn (tmp2, vcall_offset_rtx);
10568 vcall_offset_rtx = tmp2;
10570 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10574 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10576 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10578 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10581 /* Generate a tail call to the target function. */
10582 if (! TREE_USED (function))
10584 assemble_external (function);
10585 TREE_USED (function) = 1;
10587 funexp = XEXP (DECL_RTL (function), 0);
10588 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10589 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10590 insn = get_last_insn ();
10591 SIBLING_CALL_P (insn) = 1;
10593 /* Code generation for calls relies on splitting. */
10594 reload_completed = 1;
10595 epilogue_completed = 1;
10596 try_split (PATTERN (insn), insn, 0);
10600 /* Run just enough of rest_of_compilation to get the insns emitted.
10601 There's not really enough bulk here to make other passes such as
10602 instruction scheduling worth while. Note that use_thunk calls
10603 assemble_start_function and assemble_end_function. */
10605 insn_locators_alloc ();
10606 emit_all_insn_group_barriers (NULL);
10607 insn = get_insns ();
10608 shorten_branches (insn);
10609 final_start_function (insn, file, 1);
10610 final (insn, file, 1);
10611 final_end_function ();
10613 reload_completed = 0;
10614 epilogue_completed = 0;
10617 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10620 ia64_struct_value_rtx (tree fntype,
10621 int incoming ATTRIBUTE_UNUSED)
10623 if (TARGET_ABI_OPEN_VMS ||
10624 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10626 return gen_rtx_REG (Pmode, GR_REG (8));
10630 ia64_scalar_mode_supported_p (enum machine_mode mode)
10656 ia64_vector_mode_supported_p (enum machine_mode mode)
10673 /* Implement the FUNCTION_PROFILER macro. */
10676 ia64_output_function_profiler (FILE *file, int labelno)
10678 bool indirect_call;
10680 /* If the function needs a static chain and the static chain
10681 register is r15, we use an indirect call so as to bypass
10682 the PLT stub in case the executable is dynamically linked,
10683 because the stub clobbers r15 as per 5.3.6 of the psABI.
10684 We don't need to do that in non canonical PIC mode. */
10686 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10688 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10689 indirect_call = true;
10692 indirect_call = false;
10695 fputs ("\t.prologue 4, r40\n", file);
10697 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10698 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10700 if (NO_PROFILE_COUNTERS)
10701 fputs ("\tmov out3 = r0\n", file);
10705 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10707 if (TARGET_AUTO_PIC)
10708 fputs ("\tmovl out3 = @gprel(", file);
10710 fputs ("\taddl out3 = @ltoff(", file);
10711 assemble_name (file, buf);
10712 if (TARGET_AUTO_PIC)
10713 fputs (")\n", file);
10715 fputs ("), r1\n", file);
10719 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10720 fputs ("\t;;\n", file);
10722 fputs ("\t.save rp, r42\n", file);
10723 fputs ("\tmov out2 = b0\n", file);
10725 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10726 fputs ("\t.body\n", file);
10727 fputs ("\tmov out1 = r1\n", file);
10730 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10731 fputs ("\tmov b6 = r16\n", file);
10732 fputs ("\tld8 r1 = [r14]\n", file);
10733 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10736 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10739 static GTY(()) rtx mcount_func_rtx;
10741 gen_mcount_func_rtx (void)
10743 if (!mcount_func_rtx)
10744 mcount_func_rtx = init_one_libfunc ("_mcount");
10745 return mcount_func_rtx;
10749 ia64_profile_hook (int labelno)
10753 if (NO_PROFILE_COUNTERS)
10754 label = const0_rtx;
10758 const char *label_name;
10759 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10760 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10761 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10762 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10764 ip = gen_reg_rtx (Pmode);
10765 emit_insn (gen_ip_value (ip));
10766 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10768 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10773 /* Return the mangling of TYPE if it is an extended fundamental type. */
10775 static const char *
10776 ia64_mangle_type (const_tree type)
10778 type = TYPE_MAIN_VARIANT (type);
10780 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10781 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10784 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10786 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10788 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10789 an extended mangling. Elsewhere, "e" is available since long
10790 double is 80 bits. */
10791 if (TYPE_MODE (type) == XFmode)
10792 return TARGET_HPUX ? "u9__float80" : "e";
10793 if (TYPE_MODE (type) == RFmode)
10794 return "u7__fpreg";
10798 /* Return the diagnostic message string if conversion from FROMTYPE to
10799 TOTYPE is not allowed, NULL otherwise. */
10800 static const char *
10801 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10803 /* Reject nontrivial conversion to or from __fpreg. */
10804 if (TYPE_MODE (fromtype) == RFmode
10805 && TYPE_MODE (totype) != RFmode
10806 && TYPE_MODE (totype) != VOIDmode)
10807 return N_("invalid conversion from %<__fpreg%>");
10808 if (TYPE_MODE (totype) == RFmode
10809 && TYPE_MODE (fromtype) != RFmode)
10810 return N_("invalid conversion to %<__fpreg%>");
10814 /* Return the diagnostic message string if the unary operation OP is
10815 not permitted on TYPE, NULL otherwise. */
10816 static const char *
10817 ia64_invalid_unary_op (int op, const_tree type)
10819 /* Reject operations on __fpreg other than unary + or &. */
10820 if (TYPE_MODE (type) == RFmode
10821 && op != CONVERT_EXPR
10822 && op != ADDR_EXPR)
10823 return N_("invalid operation on %<__fpreg%>");
10827 /* Return the diagnostic message string if the binary operation OP is
10828 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10829 static const char *
10830 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10832 /* Reject operations on __fpreg. */
10833 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10834 return N_("invalid operation on %<__fpreg%>");
10838 /* Implement overriding of the optimization options. */
10840 ia64_option_optimization (int level ATTRIBUTE_UNUSED,
10841 int size ATTRIBUTE_UNUSED)
10843 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
10844 SUBTARGET_OPTIMIZATION_OPTIONS;
10848 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
10850 ia64_option_default_params (void)
10852 /* Let the scheduler form additional regions. */
10853 set_default_param_value (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS, 2);
10855 /* Set the default values for cache-related parameters. */
10856 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6);
10857 set_default_param_value (PARAM_L1_CACHE_LINE_SIZE, 32);
10859 set_default_param_value (PARAM_SCHED_MEM_TRUE_DEP_COST, 4);
10862 /* HP-UX version_id attribute.
10863 For object foo, if the version_id is set to 1234 put out an alias
10864 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10865 other than an alias statement because it is an illegal symbol name. */
10868 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10869 tree name ATTRIBUTE_UNUSED,
10871 int flags ATTRIBUTE_UNUSED,
10872 bool *no_add_attrs)
10874 tree arg = TREE_VALUE (args);
10876 if (TREE_CODE (arg) != STRING_CST)
10878 error("version attribute is not a string");
10879 *no_add_attrs = true;
10885 /* Target hook for c_mode_for_suffix. */
10887 static enum machine_mode
10888 ia64_c_mode_for_suffix (char suffix)
10898 static enum machine_mode
10899 ia64_promote_function_mode (const_tree type,
10900 enum machine_mode mode,
10902 const_tree funtype,
10905 /* Special processing required for OpenVMS ... */
10907 if (!TARGET_ABI_OPEN_VMS)
10908 return default_promote_function_mode(type, mode, punsignedp, funtype,
10911 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10912 HP OpenVMS I64 Version 8.2EFT,
10913 chapter 4 "OpenVMS I64 Conventions"
10914 section 4.7 "Procedure Linkage"
10915 subsection 4.7.5.2, "Normal Register Parameters"
10917 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10918 values passed in registers are zero-filled; signed integral values as
10919 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10920 For all other types passed in the general registers, unused bits are
10923 if (!AGGREGATE_TYPE_P (type)
10924 && GET_MODE_CLASS (mode) == MODE_INT
10925 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10927 if (mode == SImode)
10932 return promote_mode (type, mode, punsignedp);
10935 static GTY(()) rtx ia64_dconst_0_5_rtx;
10938 ia64_dconst_0_5 (void)
10940 if (! ia64_dconst_0_5_rtx)
10942 REAL_VALUE_TYPE rv;
10943 real_from_string (&rv, "0.5");
10944 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10946 return ia64_dconst_0_5_rtx;
10949 static GTY(()) rtx ia64_dconst_0_375_rtx;
10952 ia64_dconst_0_375 (void)
10954 if (! ia64_dconst_0_375_rtx)
10956 REAL_VALUE_TYPE rv;
10957 real_from_string (&rv, "0.375");
10958 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
10960 return ia64_dconst_0_375_rtx;
10964 #include "gt-ia64.h"