1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "diagnostic-core.h"
47 #include "sched-int.h"
50 #include "target-def.h"
53 #include "langhooks.h"
54 #include "cfglayout.h"
61 #include "tm-constrs.h"
62 #include "sel-sched.h"
65 /* This is used for communication between ASM_OUTPUT_LABEL and
66 ASM_OUTPUT_LABELREF. */
67 int ia64_asm_output_label = 0;
69 /* Register names for ia64_expand_prologue. */
70 static const char * const ia64_reg_numbers[96] =
71 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
72 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
73 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
74 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
75 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
76 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
77 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
78 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
79 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
80 "r104","r105","r106","r107","r108","r109","r110","r111",
81 "r112","r113","r114","r115","r116","r117","r118","r119",
82 "r120","r121","r122","r123","r124","r125","r126","r127"};
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_input_reg_names[8] =
86 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
88 /* ??? These strings could be shared with REGISTER_NAMES. */
89 static const char * const ia64_local_reg_names[80] =
90 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
91 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
92 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
93 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
94 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
95 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
96 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
97 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
98 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
99 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101 /* ??? These strings could be shared with REGISTER_NAMES. */
102 static const char * const ia64_output_reg_names[8] =
103 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105 /* Which cpu are we scheduling for. */
106 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
108 /* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110 static int ia64_flag_schedule_insns2;
112 /* Determines whether we run variable tracking in machine dependent
114 static int ia64_flag_var_tracking;
116 /* Variables which are this size or smaller are put in the sdata/sbss
119 unsigned int ia64_section_threshold;
121 /* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
134 number_of_ia64_frame_regs
137 /* Structure to be filled in by ia64_compute_frame_size with register
138 save masks and offsets for the current function. */
140 struct ia64_frame_info
142 HOST_WIDE_INT total_size; /* size of the stack frame, not including
143 the caller's scratch area. */
144 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
145 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
146 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
147 HARD_REG_SET mask; /* mask of saved registers. */
148 unsigned int gr_used_mask; /* mask of registers in use as gr spill
149 registers or long-term scratches. */
150 int n_spilled; /* number of spilled registers. */
151 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
152 int n_input_regs; /* number of input registers used. */
153 int n_local_regs; /* number of local registers used. */
154 int n_output_regs; /* number of output registers used. */
155 int n_rotate_regs; /* number of rotating registers used. */
157 char need_regstk; /* true if a .regstk directive needed. */
158 char initialized; /* true if the data is finalized. */
161 /* Current frame information calculated by ia64_compute_frame_size. */
162 static struct ia64_frame_info current_frame_info;
163 /* The actual registers that are emitted. */
164 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
166 static int ia64_first_cycle_multipass_dfa_lookahead (void);
167 static void ia64_dependencies_evaluation_hook (rtx, rtx);
168 static void ia64_init_dfa_pre_cycle_insn (void);
169 static rtx ia64_dfa_pre_cycle_insn (void);
170 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
171 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static void ia64_h_i_d_extended (void);
174 static void * ia64_alloc_sched_context (void);
175 static void ia64_init_sched_context (void *, bool);
176 static void ia64_set_sched_context (void *);
177 static void ia64_clear_sched_context (void *);
178 static void ia64_free_sched_context (void *);
179 static int ia64_mode_to_int (enum machine_mode);
180 static void ia64_set_sched_flags (spec_info_t);
181 static ds_t ia64_get_insn_spec_ds (rtx);
182 static ds_t ia64_get_insn_checked_ds (rtx);
183 static bool ia64_skip_rtx_p (const_rtx);
184 static int ia64_speculate_insn (rtx, ds_t, rtx *);
185 static bool ia64_needs_block_p (int);
186 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
187 static int ia64_spec_check_p (rtx);
188 static int ia64_spec_check_src_p (rtx);
189 static rtx gen_tls_get_addr (void);
190 static rtx gen_thread_pointer (void);
191 static int find_gr_spill (enum ia64_frame_regs, int);
192 static int next_scratch_gr_reg (void);
193 static void mark_reg_gr_used_mask (rtx, void *);
194 static void ia64_compute_frame_size (HOST_WIDE_INT);
195 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
196 static void finish_spill_pointers (void);
197 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
198 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
199 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
200 static rtx gen_movdi_x (rtx, rtx, rtx);
201 static rtx gen_fr_spill_x (rtx, rtx, rtx);
202 static rtx gen_fr_restore_x (rtx, rtx, rtx);
204 static void ia64_option_override (void);
205 static void ia64_option_default_params (void);
206 static bool ia64_can_eliminate (const int, const int);
207 static enum machine_mode hfa_element_mode (const_tree, bool);
208 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
210 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
212 static rtx ia64_function_arg_1 (const CUMULATIVE_ARGS *, enum machine_mode,
213 const_tree, bool, bool);
214 static rtx ia64_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
216 static rtx ia64_function_incoming_arg (CUMULATIVE_ARGS *,
217 enum machine_mode, const_tree, bool);
218 static void ia64_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
220 static unsigned int ia64_function_arg_boundary (enum machine_mode,
222 static bool ia64_function_ok_for_sibcall (tree, tree);
223 static bool ia64_return_in_memory (const_tree, const_tree);
224 static rtx ia64_function_value (const_tree, const_tree, bool);
225 static rtx ia64_libcall_value (enum machine_mode, const_rtx);
226 static bool ia64_function_value_regno_p (const unsigned int);
227 static int ia64_register_move_cost (enum machine_mode, reg_class_t,
229 static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
231 static bool ia64_rtx_costs (rtx, int, int, int *, bool);
232 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
233 static void fix_range (const char *);
234 static bool ia64_handle_option (size_t, const char *, int);
235 static struct machine_function * ia64_init_machine_status (void);
236 static void emit_insn_group_barriers (FILE *);
237 static void emit_all_insn_group_barriers (FILE *);
238 static void final_emit_insn_group_barriers (FILE *);
239 static void emit_predicate_relation_info (void);
240 static void ia64_reorg (void);
241 static bool ia64_in_small_data_p (const_tree);
242 static void process_epilogue (FILE *, rtx, bool, bool);
244 static bool ia64_assemble_integer (rtx, unsigned int, int);
245 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
246 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
247 static void ia64_output_function_end_prologue (FILE *);
249 static int ia64_issue_rate (void);
250 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
251 static void ia64_sched_init (FILE *, int, int);
252 static void ia64_sched_init_global (FILE *, int, int);
253 static void ia64_sched_finish_global (FILE *, int);
254 static void ia64_sched_finish (FILE *, int);
255 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
256 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
257 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
258 static int ia64_variable_issue (FILE *, int, rtx, int);
260 static void ia64_asm_unwind_emit (FILE *, rtx);
261 static void ia64_asm_emit_except_personality (rtx);
262 static void ia64_asm_init_sections (void);
264 static enum unwind_info_type ia64_debug_unwind_info (void);
265 static enum unwind_info_type ia64_except_unwind_info (void);
267 static struct bundle_state *get_free_bundle_state (void);
268 static void free_bundle_state (struct bundle_state *);
269 static void initiate_bundle_states (void);
270 static void finish_bundle_states (void);
271 static unsigned bundle_state_hash (const void *);
272 static int bundle_state_eq_p (const void *, const void *);
273 static int insert_bundle_state (struct bundle_state *);
274 static void initiate_bundle_state_table (void);
275 static void finish_bundle_state_table (void);
276 static int try_issue_nops (struct bundle_state *, int);
277 static int try_issue_insn (struct bundle_state *, rtx);
278 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
279 static int get_max_pos (state_t);
280 static int get_template (state_t, int);
282 static rtx get_next_important_insn (rtx, rtx);
283 static bool important_for_bundling_p (rtx);
284 static void bundling (FILE *, int, rtx, rtx);
286 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
287 HOST_WIDE_INT, tree);
288 static void ia64_file_start (void);
289 static void ia64_globalize_decl_name (FILE *, tree);
291 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
292 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
293 static section *ia64_select_rtx_section (enum machine_mode, rtx,
294 unsigned HOST_WIDE_INT);
295 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
297 static unsigned int ia64_section_type_flags (tree, const char *, int);
298 static void ia64_init_libfuncs (void)
300 static void ia64_hpux_init_libfuncs (void)
302 static void ia64_sysv4_init_libfuncs (void)
304 static void ia64_vms_init_libfuncs (void)
306 static void ia64_soft_fp_init_libfuncs (void)
308 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
310 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
313 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
314 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
315 static void ia64_encode_section_info (tree, rtx, int);
316 static rtx ia64_struct_value_rtx (tree, int);
317 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
318 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
319 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
320 static bool ia64_cannot_force_const_mem (rtx);
321 static const char *ia64_mangle_type (const_tree);
322 static const char *ia64_invalid_conversion (const_tree, const_tree);
323 static const char *ia64_invalid_unary_op (int, const_tree);
324 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
325 static enum machine_mode ia64_c_mode_for_suffix (char);
326 static enum machine_mode ia64_promote_function_mode (const_tree,
331 static void ia64_trampoline_init (rtx, tree, rtx);
332 static void ia64_override_options_after_change (void);
334 static void ia64_dwarf_handle_frame_unspec (const char *, rtx, int);
335 static tree ia64_builtin_decl (unsigned, bool);
337 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
338 static enum machine_mode ia64_get_reg_raw_mode (int regno);
339 static section * ia64_hpux_function_section (tree, enum node_frequency,
342 /* Table of valid machine attributes. */
343 static const struct attribute_spec ia64_attribute_table[] =
345 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
346 { "syscall_linkage", 0, 0, false, true, true, NULL },
347 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
348 #if TARGET_ABI_OPEN_VMS
349 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute},
351 { "version_id", 1, 1, true, false, false,
352 ia64_handle_version_id_attribute },
353 { NULL, 0, 0, false, false, false, NULL }
356 /* Implement overriding of the optimization options. */
357 static const struct default_options ia64_option_optimization_table[] =
359 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
360 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
361 SUBTARGET_OPTIMIZATION_OPTIONS,
363 { OPT_LEVELS_NONE, 0, NULL, 0 }
366 /* Initialize the GCC target structure. */
367 #undef TARGET_ATTRIBUTE_TABLE
368 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
370 #undef TARGET_INIT_BUILTINS
371 #define TARGET_INIT_BUILTINS ia64_init_builtins
373 #undef TARGET_EXPAND_BUILTIN
374 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
376 #undef TARGET_BUILTIN_DECL
377 #define TARGET_BUILTIN_DECL ia64_builtin_decl
379 #undef TARGET_ASM_BYTE_OP
380 #define TARGET_ASM_BYTE_OP "\tdata1\t"
381 #undef TARGET_ASM_ALIGNED_HI_OP
382 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
383 #undef TARGET_ASM_ALIGNED_SI_OP
384 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
385 #undef TARGET_ASM_ALIGNED_DI_OP
386 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
387 #undef TARGET_ASM_UNALIGNED_HI_OP
388 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
389 #undef TARGET_ASM_UNALIGNED_SI_OP
390 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
391 #undef TARGET_ASM_UNALIGNED_DI_OP
392 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER ia64_assemble_integer
396 #undef TARGET_OPTION_OVERRIDE
397 #define TARGET_OPTION_OVERRIDE ia64_option_override
398 #undef TARGET_OPTION_OPTIMIZATION_TABLE
399 #define TARGET_OPTION_OPTIMIZATION_TABLE ia64_option_optimization_table
400 #undef TARGET_OPTION_DEFAULT_PARAMS
401 #define TARGET_OPTION_DEFAULT_PARAMS ia64_option_default_params
403 #undef TARGET_ASM_FUNCTION_PROLOGUE
404 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
405 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
406 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
407 #undef TARGET_ASM_FUNCTION_EPILOGUE
408 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
410 #undef TARGET_IN_SMALL_DATA_P
411 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
413 #undef TARGET_SCHED_ADJUST_COST_2
414 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
415 #undef TARGET_SCHED_ISSUE_RATE
416 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
417 #undef TARGET_SCHED_VARIABLE_ISSUE
418 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
419 #undef TARGET_SCHED_INIT
420 #define TARGET_SCHED_INIT ia64_sched_init
421 #undef TARGET_SCHED_FINISH
422 #define TARGET_SCHED_FINISH ia64_sched_finish
423 #undef TARGET_SCHED_INIT_GLOBAL
424 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
425 #undef TARGET_SCHED_FINISH_GLOBAL
426 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
427 #undef TARGET_SCHED_REORDER
428 #define TARGET_SCHED_REORDER ia64_sched_reorder
429 #undef TARGET_SCHED_REORDER2
430 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
432 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
433 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
435 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
436 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
438 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
439 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
440 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
441 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
443 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
444 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
445 ia64_first_cycle_multipass_dfa_lookahead_guard
447 #undef TARGET_SCHED_DFA_NEW_CYCLE
448 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
450 #undef TARGET_SCHED_H_I_D_EXTENDED
451 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
453 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
454 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
456 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
457 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
459 #undef TARGET_SCHED_SET_SCHED_CONTEXT
460 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
462 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
463 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
465 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
466 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
468 #undef TARGET_SCHED_SET_SCHED_FLAGS
469 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
471 #undef TARGET_SCHED_GET_INSN_SPEC_DS
472 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
474 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
475 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
477 #undef TARGET_SCHED_SPECULATE_INSN
478 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
480 #undef TARGET_SCHED_NEEDS_BLOCK_P
481 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
483 #undef TARGET_SCHED_GEN_SPEC_CHECK
484 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
486 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
487 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
488 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
490 #undef TARGET_SCHED_SKIP_RTX_P
491 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
495 #undef TARGET_ARG_PARTIAL_BYTES
496 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
497 #undef TARGET_FUNCTION_ARG
498 #define TARGET_FUNCTION_ARG ia64_function_arg
499 #undef TARGET_FUNCTION_INCOMING_ARG
500 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
501 #undef TARGET_FUNCTION_ARG_ADVANCE
502 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
503 #undef TARGET_FUNCTION_ARG_BOUNDARY
504 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
506 #undef TARGET_ASM_OUTPUT_MI_THUNK
507 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
508 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
509 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
511 #undef TARGET_ASM_FILE_START
512 #define TARGET_ASM_FILE_START ia64_file_start
514 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
515 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
517 #undef TARGET_REGISTER_MOVE_COST
518 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
519 #undef TARGET_MEMORY_MOVE_COST
520 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
521 #undef TARGET_RTX_COSTS
522 #define TARGET_RTX_COSTS ia64_rtx_costs
523 #undef TARGET_ADDRESS_COST
524 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
526 #undef TARGET_UNSPEC_MAY_TRAP_P
527 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
529 #undef TARGET_MACHINE_DEPENDENT_REORG
530 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
532 #undef TARGET_ENCODE_SECTION_INFO
533 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
535 #undef TARGET_SECTION_TYPE_FLAGS
536 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
539 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
540 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
543 #undef TARGET_PROMOTE_FUNCTION_MODE
544 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
546 /* ??? Investigate. */
548 #undef TARGET_PROMOTE_PROTOTYPES
549 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
552 #undef TARGET_FUNCTION_VALUE
553 #define TARGET_FUNCTION_VALUE ia64_function_value
554 #undef TARGET_LIBCALL_VALUE
555 #define TARGET_LIBCALL_VALUE ia64_libcall_value
556 #undef TARGET_FUNCTION_VALUE_REGNO_P
557 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
559 #undef TARGET_STRUCT_VALUE_RTX
560 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
561 #undef TARGET_RETURN_IN_MEMORY
562 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
563 #undef TARGET_SETUP_INCOMING_VARARGS
564 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
565 #undef TARGET_STRICT_ARGUMENT_NAMING
566 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
567 #undef TARGET_MUST_PASS_IN_STACK
568 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
569 #undef TARGET_GET_RAW_RESULT_MODE
570 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
571 #undef TARGET_GET_RAW_ARG_MODE
572 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
574 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
575 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
577 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
578 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ia64_dwarf_handle_frame_unspec
579 #undef TARGET_ASM_UNWIND_EMIT
580 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
581 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
582 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
583 #undef TARGET_ASM_INIT_SECTIONS
584 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
586 #undef TARGET_DEBUG_UNWIND_INFO
587 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
588 #undef TARGET_EXCEPT_UNWIND_INFO
589 #define TARGET_EXCEPT_UNWIND_INFO ia64_except_unwind_info
591 #undef TARGET_SCALAR_MODE_SUPPORTED_P
592 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
593 #undef TARGET_VECTOR_MODE_SUPPORTED_P
594 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
596 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
597 in an order different from the specified program order. */
598 #undef TARGET_RELAXED_ORDERING
599 #define TARGET_RELAXED_ORDERING true
601 #undef TARGET_DEFAULT_TARGET_FLAGS
602 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
603 #undef TARGET_HANDLE_OPTION
604 #define TARGET_HANDLE_OPTION ia64_handle_option
606 #undef TARGET_CANNOT_FORCE_CONST_MEM
607 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
609 #undef TARGET_MANGLE_TYPE
610 #define TARGET_MANGLE_TYPE ia64_mangle_type
612 #undef TARGET_INVALID_CONVERSION
613 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
614 #undef TARGET_INVALID_UNARY_OP
615 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
616 #undef TARGET_INVALID_BINARY_OP
617 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
619 #undef TARGET_C_MODE_FOR_SUFFIX
620 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
622 #undef TARGET_CAN_ELIMINATE
623 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
625 #undef TARGET_TRAMPOLINE_INIT
626 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
628 #undef TARGET_INVALID_WITHIN_DOLOOP
629 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
631 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
632 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
634 #undef TARGET_PREFERRED_RELOAD_CLASS
635 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
637 struct gcc_target targetm = TARGET_INITIALIZER;
641 ADDR_AREA_NORMAL, /* normal address area */
642 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
646 static GTY(()) tree small_ident1;
647 static GTY(()) tree small_ident2;
652 if (small_ident1 == 0)
654 small_ident1 = get_identifier ("small");
655 small_ident2 = get_identifier ("__small__");
659 /* Retrieve the address area that has been chosen for the given decl. */
661 static ia64_addr_area
662 ia64_get_addr_area (tree decl)
666 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
672 id = TREE_VALUE (TREE_VALUE (model_attr));
673 if (id == small_ident1 || id == small_ident2)
674 return ADDR_AREA_SMALL;
676 return ADDR_AREA_NORMAL;
680 ia64_handle_model_attribute (tree *node, tree name, tree args,
681 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
683 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
685 tree arg, decl = *node;
688 arg = TREE_VALUE (args);
689 if (arg == small_ident1 || arg == small_ident2)
691 addr_area = ADDR_AREA_SMALL;
695 warning (OPT_Wattributes, "invalid argument of %qE attribute",
697 *no_add_attrs = true;
700 switch (TREE_CODE (decl))
703 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
705 && !TREE_STATIC (decl))
707 error_at (DECL_SOURCE_LOCATION (decl),
708 "an address area attribute cannot be specified for "
710 *no_add_attrs = true;
712 area = ia64_get_addr_area (decl);
713 if (area != ADDR_AREA_NORMAL && addr_area != area)
715 error ("address area of %q+D conflicts with previous "
716 "declaration", decl);
717 *no_add_attrs = true;
722 error_at (DECL_SOURCE_LOCATION (decl),
723 "address area attribute cannot be specified for "
725 *no_add_attrs = true;
729 warning (OPT_Wattributes, "%qE attribute ignored",
731 *no_add_attrs = true;
738 /* The section must have global and overlaid attributes. */
739 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
741 /* Part of the low level implementation of DEC Ada pragma Common_Object which
742 enables the shared use of variables stored in overlaid linker areas
743 corresponding to the use of Fortran COMMON. */
746 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
747 int flags ATTRIBUTE_UNUSED,
755 DECL_COMMON (decl) = 1;
756 id = TREE_VALUE (args);
757 if (TREE_CODE (id) == IDENTIFIER_NODE)
758 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
759 else if (TREE_CODE (id) == STRING_CST)
763 warning (OPT_Wattributes,
764 "%qE attribute requires a string constant argument", name);
765 *no_add_attrs = true;
768 DECL_SECTION_NAME (decl) = val;
772 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
775 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
776 unsigned HOST_WIDE_INT size,
779 tree attr = DECL_ATTRIBUTES (decl);
781 /* As common_object attribute set DECL_SECTION_NAME check it before
782 looking up the attribute. */
783 if (DECL_SECTION_NAME (decl) && attr)
784 attr = lookup_attribute ("common_object", attr);
790 /* Code from elfos.h. */
791 fprintf (file, "%s", COMMON_ASM_OP);
792 assemble_name (file, name);
793 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
794 size, align / BITS_PER_UNIT);
798 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
799 ASM_OUTPUT_LABEL (file, name);
800 ASM_OUTPUT_SKIP (file, size ? size : 1);
804 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
807 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
810 if (!(flags & SECTION_VMS_OVERLAY))
812 default_elf_asm_named_section (name, flags, decl);
815 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
818 if (flags & SECTION_DECLARED)
820 fprintf (asm_out_file, "\t.section\t%s\n", name);
824 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
828 ia64_encode_addr_area (tree decl, rtx symbol)
832 flags = SYMBOL_REF_FLAGS (symbol);
833 switch (ia64_get_addr_area (decl))
835 case ADDR_AREA_NORMAL: break;
836 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
837 default: gcc_unreachable ();
839 SYMBOL_REF_FLAGS (symbol) = flags;
843 ia64_encode_section_info (tree decl, rtx rtl, int first)
845 default_encode_section_info (decl, rtl, first);
847 /* Careful not to prod global register variables. */
848 if (TREE_CODE (decl) == VAR_DECL
849 && GET_CODE (DECL_RTL (decl)) == MEM
850 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
851 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
852 ia64_encode_addr_area (decl, XEXP (rtl, 0));
855 /* Return 1 if the operands of a move are ok. */
858 ia64_move_ok (rtx dst, rtx src)
860 /* If we're under init_recog_no_volatile, we'll not be able to use
861 memory_operand. So check the code directly and don't worry about
862 the validity of the underlying address, which should have been
863 checked elsewhere anyway. */
864 if (GET_CODE (dst) != MEM)
866 if (GET_CODE (src) == MEM)
868 if (register_operand (src, VOIDmode))
871 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
872 if (INTEGRAL_MODE_P (GET_MODE (dst)))
873 return src == const0_rtx;
875 return satisfies_constraint_G (src);
878 /* Return 1 if the operands are ok for a floating point load pair. */
881 ia64_load_pair_ok (rtx dst, rtx src)
883 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
885 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
887 switch (GET_CODE (XEXP (src, 0)))
896 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
898 if (GET_CODE (adjust) != CONST_INT
899 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
910 addp4_optimize_ok (rtx op1, rtx op2)
912 return (basereg_operand (op1, GET_MODE(op1)) !=
913 basereg_operand (op2, GET_MODE(op2)));
916 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
917 Return the length of the field, or <= 0 on failure. */
920 ia64_depz_field_mask (rtx rop, rtx rshift)
922 unsigned HOST_WIDE_INT op = INTVAL (rop);
923 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
925 /* Get rid of the zero bits we're shifting in. */
928 /* We must now have a solid block of 1's at bit 0. */
929 return exact_log2 (op + 1);
932 /* Return the TLS model to use for ADDR. */
934 static enum tls_model
935 tls_symbolic_operand_type (rtx addr)
937 enum tls_model tls_kind = TLS_MODEL_NONE;
939 if (GET_CODE (addr) == CONST)
941 if (GET_CODE (XEXP (addr, 0)) == PLUS
942 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
943 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
945 else if (GET_CODE (addr) == SYMBOL_REF)
946 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
951 /* Return true if X is a constant that is valid for some immediate
952 field in an instruction. */
955 ia64_legitimate_constant_p (rtx x)
957 switch (GET_CODE (x))
964 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
965 || GET_MODE (x) == DFmode)
967 return satisfies_constraint_G (x);
971 /* ??? Short term workaround for PR 28490. We must make the code here
972 match the code in ia64_expand_move and move_operand, even though they
973 are both technically wrong. */
974 if (tls_symbolic_operand_type (x) == 0)
976 HOST_WIDE_INT addend = 0;
979 if (GET_CODE (op) == CONST
980 && GET_CODE (XEXP (op, 0)) == PLUS
981 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
983 addend = INTVAL (XEXP (XEXP (op, 0), 1));
984 op = XEXP (XEXP (op, 0), 0);
987 if (any_offset_symbol_operand (op, GET_MODE (op))
988 || function_operand (op, GET_MODE (op)))
990 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
991 return (addend & 0x3fff) == 0;
998 enum machine_mode mode = GET_MODE (x);
1000 if (mode == V2SFmode)
1001 return satisfies_constraint_Y (x);
1003 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1004 && GET_MODE_SIZE (mode) <= 8);
1012 /* Don't allow TLS addresses to get spilled to memory. */
1015 ia64_cannot_force_const_mem (rtx x)
1017 if (GET_MODE (x) == RFmode)
1019 return tls_symbolic_operand_type (x) != 0;
1022 /* Expand a symbolic constant load. */
1025 ia64_expand_load_address (rtx dest, rtx src)
1027 gcc_assert (GET_CODE (dest) == REG);
1029 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1030 having to pointer-extend the value afterward. Other forms of address
1031 computation below are also more natural to compute as 64-bit quantities.
1032 If we've been given an SImode destination register, change it. */
1033 if (GET_MODE (dest) != Pmode)
1034 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1035 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1039 if (small_addr_symbolic_operand (src, VOIDmode))
1042 if (TARGET_AUTO_PIC)
1043 emit_insn (gen_load_gprel64 (dest, src));
1044 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1045 emit_insn (gen_load_fptr (dest, src));
1046 else if (sdata_symbolic_operand (src, VOIDmode))
1047 emit_insn (gen_load_gprel (dest, src));
1050 HOST_WIDE_INT addend = 0;
1053 /* We did split constant offsets in ia64_expand_move, and we did try
1054 to keep them split in move_operand, but we also allowed reload to
1055 rematerialize arbitrary constants rather than spill the value to
1056 the stack and reload it. So we have to be prepared here to split
1057 them apart again. */
1058 if (GET_CODE (src) == CONST)
1060 HOST_WIDE_INT hi, lo;
1062 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1063 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1069 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1073 tmp = gen_rtx_HIGH (Pmode, src);
1074 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1075 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1077 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
1078 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1082 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1083 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1090 static GTY(()) rtx gen_tls_tga;
1092 gen_tls_get_addr (void)
1095 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1099 static GTY(()) rtx thread_pointer_rtx;
1101 gen_thread_pointer (void)
1103 if (!thread_pointer_rtx)
1104 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1105 return thread_pointer_rtx;
1109 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1110 rtx orig_op1, HOST_WIDE_INT addend)
1112 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1114 HOST_WIDE_INT addend_lo, addend_hi;
1118 case TLS_MODEL_GLOBAL_DYNAMIC:
1121 tga_op1 = gen_reg_rtx (Pmode);
1122 emit_insn (gen_load_dtpmod (tga_op1, op1));
1124 tga_op2 = gen_reg_rtx (Pmode);
1125 emit_insn (gen_load_dtprel (tga_op2, op1));
1127 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1128 LCT_CONST, Pmode, 2, tga_op1,
1129 Pmode, tga_op2, Pmode);
1131 insns = get_insns ();
1134 if (GET_MODE (op0) != Pmode)
1136 emit_libcall_block (insns, op0, tga_ret, op1);
1139 case TLS_MODEL_LOCAL_DYNAMIC:
1140 /* ??? This isn't the completely proper way to do local-dynamic
1141 If the call to __tls_get_addr is used only by a single symbol,
1142 then we should (somehow) move the dtprel to the second arg
1143 to avoid the extra add. */
1146 tga_op1 = gen_reg_rtx (Pmode);
1147 emit_insn (gen_load_dtpmod (tga_op1, op1));
1149 tga_op2 = const0_rtx;
1151 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1152 LCT_CONST, Pmode, 2, tga_op1,
1153 Pmode, tga_op2, Pmode);
1155 insns = get_insns ();
1158 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1160 tmp = gen_reg_rtx (Pmode);
1161 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1163 if (!register_operand (op0, Pmode))
1164 op0 = gen_reg_rtx (Pmode);
1167 emit_insn (gen_load_dtprel (op0, op1));
1168 emit_insn (gen_adddi3 (op0, tmp, op0));
1171 emit_insn (gen_add_dtprel (op0, op1, tmp));
1174 case TLS_MODEL_INITIAL_EXEC:
1175 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1176 addend_hi = addend - addend_lo;
1178 op1 = plus_constant (op1, addend_hi);
1181 tmp = gen_reg_rtx (Pmode);
1182 emit_insn (gen_load_tprel (tmp, op1));
1184 if (!register_operand (op0, Pmode))
1185 op0 = gen_reg_rtx (Pmode);
1186 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1189 case TLS_MODEL_LOCAL_EXEC:
1190 if (!register_operand (op0, Pmode))
1191 op0 = gen_reg_rtx (Pmode);
1197 emit_insn (gen_load_tprel (op0, op1));
1198 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1201 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1209 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1210 orig_op0, 1, OPTAB_DIRECT);
1211 if (orig_op0 == op0)
1213 if (GET_MODE (orig_op0) == Pmode)
1215 return gen_lowpart (GET_MODE (orig_op0), op0);
1219 ia64_expand_move (rtx op0, rtx op1)
1221 enum machine_mode mode = GET_MODE (op0);
1223 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1224 op1 = force_reg (mode, op1);
1226 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1228 HOST_WIDE_INT addend = 0;
1229 enum tls_model tls_kind;
1232 if (GET_CODE (op1) == CONST
1233 && GET_CODE (XEXP (op1, 0)) == PLUS
1234 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1236 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1237 sym = XEXP (XEXP (op1, 0), 0);
1240 tls_kind = tls_symbolic_operand_type (sym);
1242 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1244 if (any_offset_symbol_operand (sym, mode))
1246 else if (aligned_offset_symbol_operand (sym, mode))
1248 HOST_WIDE_INT addend_lo, addend_hi;
1250 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1251 addend_hi = addend - addend_lo;
1255 op1 = plus_constant (sym, addend_hi);
1264 if (reload_completed)
1266 /* We really should have taken care of this offset earlier. */
1267 gcc_assert (addend == 0);
1268 if (ia64_expand_load_address (op0, op1))
1274 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1276 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1278 op1 = expand_simple_binop (mode, PLUS, subtarget,
1279 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1288 /* Split a move from OP1 to OP0 conditional on COND. */
1291 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1293 rtx insn, first = get_last_insn ();
1295 emit_move_insn (op0, op1);
1297 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1299 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1303 /* Split a post-reload TImode or TFmode reference into two DImode
1304 components. This is made extra difficult by the fact that we do
1305 not get any scratch registers to work with, because reload cannot
1306 be prevented from giving us a scratch that overlaps the register
1307 pair involved. So instead, when addressing memory, we tweak the
1308 pointer register up and back down with POST_INCs. Or up and not
1309 back down when we can get away with it.
1311 REVERSED is true when the loads must be done in reversed order
1312 (high word first) for correctness. DEAD is true when the pointer
1313 dies with the second insn we generate and therefore the second
1314 address must not carry a postmodify.
1316 May return an insn which is to be emitted after the moves. */
1319 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1323 switch (GET_CODE (in))
1326 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1327 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1332 /* Cannot occur reversed. */
1333 gcc_assert (!reversed);
1335 if (GET_MODE (in) != TFmode)
1336 split_double (in, &out[0], &out[1]);
1338 /* split_double does not understand how to split a TFmode
1339 quantity into a pair of DImode constants. */
1342 unsigned HOST_WIDE_INT p[2];
1343 long l[4]; /* TFmode is 128 bits */
1345 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1346 real_to_target (l, &r, TFmode);
1348 if (FLOAT_WORDS_BIG_ENDIAN)
1350 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1351 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1355 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1356 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1358 out[0] = GEN_INT (p[0]);
1359 out[1] = GEN_INT (p[1]);
1365 rtx base = XEXP (in, 0);
1368 switch (GET_CODE (base))
1373 out[0] = adjust_automodify_address
1374 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1375 out[1] = adjust_automodify_address
1376 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1380 /* Reversal requires a pre-increment, which can only
1381 be done as a separate insn. */
1382 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1383 out[0] = adjust_automodify_address
1384 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1385 out[1] = adjust_address (in, DImode, 0);
1390 gcc_assert (!reversed && !dead);
1392 /* Just do the increment in two steps. */
1393 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1394 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1398 gcc_assert (!reversed && !dead);
1400 /* Add 8, subtract 24. */
1401 base = XEXP (base, 0);
1402 out[0] = adjust_automodify_address
1403 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1404 out[1] = adjust_automodify_address
1406 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1411 gcc_assert (!reversed && !dead);
1413 /* Extract and adjust the modification. This case is
1414 trickier than the others, because we might have an
1415 index register, or we might have a combined offset that
1416 doesn't fit a signed 9-bit displacement field. We can
1417 assume the incoming expression is already legitimate. */
1418 offset = XEXP (base, 1);
1419 base = XEXP (base, 0);
1421 out[0] = adjust_automodify_address
1422 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1424 if (GET_CODE (XEXP (offset, 1)) == REG)
1426 /* Can't adjust the postmodify to match. Emit the
1427 original, then a separate addition insn. */
1428 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1429 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1433 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1434 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1436 /* Again the postmodify cannot be made to match,
1437 but in this case it's more efficient to get rid
1438 of the postmodify entirely and fix up with an
1440 out[1] = adjust_automodify_address (in, DImode, base, 8);
1442 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1446 /* Combined offset still fits in the displacement field.
1447 (We cannot overflow it at the high end.) */
1448 out[1] = adjust_automodify_address
1449 (in, DImode, gen_rtx_POST_MODIFY
1450 (Pmode, base, gen_rtx_PLUS
1452 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1471 /* Split a TImode or TFmode move instruction after reload.
1472 This is used by *movtf_internal and *movti_internal. */
1474 ia64_split_tmode_move (rtx operands[])
1476 rtx in[2], out[2], insn;
1479 bool reversed = false;
1481 /* It is possible for reload to decide to overwrite a pointer with
1482 the value it points to. In that case we have to do the loads in
1483 the appropriate order so that the pointer is not destroyed too
1484 early. Also we must not generate a postmodify for that second
1485 load, or rws_access_regno will die. */
1486 if (GET_CODE (operands[1]) == MEM
1487 && reg_overlap_mentioned_p (operands[0], operands[1]))
1489 rtx base = XEXP (operands[1], 0);
1490 while (GET_CODE (base) != REG)
1491 base = XEXP (base, 0);
1493 if (REGNO (base) == REGNO (operands[0]))
1497 /* Another reason to do the moves in reversed order is if the first
1498 element of the target register pair is also the second element of
1499 the source register pair. */
1500 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1501 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1504 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1505 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1507 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1508 if (GET_CODE (EXP) == MEM \
1509 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1510 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1511 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1512 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1514 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1515 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1516 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1518 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1519 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1520 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1523 emit_insn (fixup[0]);
1525 emit_insn (fixup[1]);
1527 #undef MAYBE_ADD_REG_INC_NOTE
1530 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1531 through memory plus an extra GR scratch register. Except that you can
1532 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1533 SECONDARY_RELOAD_CLASS, but not both.
1535 We got into problems in the first place by allowing a construct like
1536 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1537 This solution attempts to prevent this situation from occurring. When
1538 we see something like the above, we spill the inner register to memory. */
1541 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1543 if (GET_CODE (in) == SUBREG
1544 && GET_MODE (SUBREG_REG (in)) == TImode
1545 && GET_CODE (SUBREG_REG (in)) == REG)
1547 rtx memt = assign_stack_temp (TImode, 16, 0);
1548 emit_move_insn (memt, SUBREG_REG (in));
1549 return adjust_address (memt, mode, 0);
1551 else if (force && GET_CODE (in) == REG)
1553 rtx memx = assign_stack_temp (mode, 16, 0);
1554 emit_move_insn (memx, in);
1561 /* Expand the movxf or movrf pattern (MODE says which) with the given
1562 OPERANDS, returning true if the pattern should then invoke
1566 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1568 rtx op0 = operands[0];
1570 if (GET_CODE (op0) == SUBREG)
1571 op0 = SUBREG_REG (op0);
1573 /* We must support XFmode loads into general registers for stdarg/vararg,
1574 unprototyped calls, and a rare case where a long double is passed as
1575 an argument after a float HFA fills the FP registers. We split them into
1576 DImode loads for convenience. We also need to support XFmode stores
1577 for the last case. This case does not happen for stdarg/vararg routines,
1578 because we do a block store to memory of unnamed arguments. */
1580 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1584 /* We're hoping to transform everything that deals with XFmode
1585 quantities and GR registers early in the compiler. */
1586 gcc_assert (can_create_pseudo_p ());
1588 /* Struct to register can just use TImode instead. */
1589 if ((GET_CODE (operands[1]) == SUBREG
1590 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1591 || (GET_CODE (operands[1]) == REG
1592 && GR_REGNO_P (REGNO (operands[1]))))
1594 rtx op1 = operands[1];
1596 if (GET_CODE (op1) == SUBREG)
1597 op1 = SUBREG_REG (op1);
1599 op1 = gen_rtx_REG (TImode, REGNO (op1));
1601 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1605 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1607 /* Don't word-swap when reading in the constant. */
1608 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1609 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1611 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1612 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1617 /* If the quantity is in a register not known to be GR, spill it. */
1618 if (register_operand (operands[1], mode))
1619 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1621 gcc_assert (GET_CODE (operands[1]) == MEM);
1623 /* Don't word-swap when reading in the value. */
1624 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1625 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1627 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1628 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1632 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1634 /* We're hoping to transform everything that deals with XFmode
1635 quantities and GR registers early in the compiler. */
1636 gcc_assert (can_create_pseudo_p ());
1638 /* Op0 can't be a GR_REG here, as that case is handled above.
1639 If op0 is a register, then we spill op1, so that we now have a
1640 MEM operand. This requires creating an XFmode subreg of a TImode reg
1641 to force the spill. */
1642 if (register_operand (operands[0], mode))
1644 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1645 op1 = gen_rtx_SUBREG (mode, op1, 0);
1646 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1653 gcc_assert (GET_CODE (operands[0]) == MEM);
1655 /* Don't word-swap when writing out the value. */
1656 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1657 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1659 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1660 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1665 if (!reload_in_progress && !reload_completed)
1667 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1669 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1671 rtx memt, memx, in = operands[1];
1672 if (CONSTANT_P (in))
1673 in = validize_mem (force_const_mem (mode, in));
1674 if (GET_CODE (in) == MEM)
1675 memt = adjust_address (in, TImode, 0);
1678 memt = assign_stack_temp (TImode, 16, 0);
1679 memx = adjust_address (memt, mode, 0);
1680 emit_move_insn (memx, in);
1682 emit_move_insn (op0, memt);
1686 if (!ia64_move_ok (operands[0], operands[1]))
1687 operands[1] = force_reg (mode, operands[1]);
1693 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1694 with the expression that holds the compare result (in VOIDmode). */
1696 static GTY(()) rtx cmptf_libfunc;
1699 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1701 enum rtx_code code = GET_CODE (*expr);
1704 /* If we have a BImode input, then we already have a compare result, and
1705 do not need to emit another comparison. */
1706 if (GET_MODE (*op0) == BImode)
1708 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1711 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1712 magic number as its third argument, that indicates what to do.
1713 The return value is an integer to be compared against zero. */
1714 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1717 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1724 enum rtx_code ncode;
1727 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1730 /* 1 = equal, 0 = not equal. Equality operators do
1731 not raise FP_INVALID when given an SNaN operand. */
1732 case EQ: magic = QCMP_EQ; ncode = NE; break;
1733 case NE: magic = QCMP_EQ; ncode = EQ; break;
1734 /* isunordered() from C99. */
1735 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1736 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1737 /* Relational operators raise FP_INVALID when given
1739 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1740 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1741 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1742 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1743 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1744 Expanders for buneq etc. weuld have to be added to ia64.md
1745 for this to be useful. */
1746 default: gcc_unreachable ();
1751 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1752 *op0, TFmode, *op1, TFmode,
1753 GEN_INT (magic), DImode);
1754 cmp = gen_reg_rtx (BImode);
1755 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1756 gen_rtx_fmt_ee (ncode, BImode,
1759 insns = get_insns ();
1762 emit_libcall_block (insns, cmp, cmp,
1763 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1768 cmp = gen_reg_rtx (BImode);
1769 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1770 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1774 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1779 /* Generate an integral vector comparison. Return true if the condition has
1780 been reversed, and so the sense of the comparison should be inverted. */
1783 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1784 rtx dest, rtx op0, rtx op1)
1786 bool negate = false;
1789 /* Canonicalize the comparison to EQ, GT, GTU. */
1800 code = reverse_condition (code);
1806 code = reverse_condition (code);
1812 code = swap_condition (code);
1813 x = op0, op0 = op1, op1 = x;
1820 /* Unsigned parallel compare is not supported by the hardware. Play some
1821 tricks to turn this into a signed comparison against 0. */
1830 /* Subtract (-(INT MAX) - 1) from both operands to make
1832 mask = GEN_INT (0x80000000);
1833 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1834 mask = force_reg (mode, mask);
1835 t1 = gen_reg_rtx (mode);
1836 emit_insn (gen_subv2si3 (t1, op0, mask));
1837 t2 = gen_reg_rtx (mode);
1838 emit_insn (gen_subv2si3 (t2, op1, mask));
1847 /* Perform a parallel unsigned saturating subtraction. */
1848 x = gen_reg_rtx (mode);
1849 emit_insn (gen_rtx_SET (VOIDmode, x,
1850 gen_rtx_US_MINUS (mode, op0, op1)));
1854 op1 = CONST0_RTX (mode);
1863 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1864 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1869 /* Emit an integral vector conditional move. */
1872 ia64_expand_vecint_cmov (rtx operands[])
1874 enum machine_mode mode = GET_MODE (operands[0]);
1875 enum rtx_code code = GET_CODE (operands[3]);
1879 cmp = gen_reg_rtx (mode);
1880 negate = ia64_expand_vecint_compare (code, mode, cmp,
1881 operands[4], operands[5]);
1883 ot = operands[1+negate];
1884 of = operands[2-negate];
1886 if (ot == CONST0_RTX (mode))
1888 if (of == CONST0_RTX (mode))
1890 emit_move_insn (operands[0], ot);
1894 x = gen_rtx_NOT (mode, cmp);
1895 x = gen_rtx_AND (mode, x, of);
1896 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1898 else if (of == CONST0_RTX (mode))
1900 x = gen_rtx_AND (mode, cmp, ot);
1901 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1907 t = gen_reg_rtx (mode);
1908 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1909 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1911 f = gen_reg_rtx (mode);
1912 x = gen_rtx_NOT (mode, cmp);
1913 x = gen_rtx_AND (mode, x, operands[2-negate]);
1914 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1916 x = gen_rtx_IOR (mode, t, f);
1917 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1921 /* Emit an integral vector min or max operation. Return true if all done. */
1924 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1929 /* These four combinations are supported directly. */
1930 if (mode == V8QImode && (code == UMIN || code == UMAX))
1932 if (mode == V4HImode && (code == SMIN || code == SMAX))
1935 /* This combination can be implemented with only saturating subtraction. */
1936 if (mode == V4HImode && code == UMAX)
1938 rtx x, tmp = gen_reg_rtx (mode);
1940 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1941 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1943 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1947 /* Everything else implemented via vector comparisons. */
1948 xops[0] = operands[0];
1949 xops[4] = xops[1] = operands[1];
1950 xops[5] = xops[2] = operands[2];
1969 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1971 ia64_expand_vecint_cmov (xops);
1975 /* Emit an integral vector widening sum operations. */
1978 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1981 enum machine_mode wmode, mode;
1982 rtx (*unpack_l) (rtx, rtx, rtx);
1983 rtx (*unpack_h) (rtx, rtx, rtx);
1984 rtx (*plus) (rtx, rtx, rtx);
1986 wmode = GET_MODE (operands[0]);
1987 mode = GET_MODE (operands[1]);
1992 unpack_l = gen_unpack1_l;
1993 unpack_h = gen_unpack1_h;
1994 plus = gen_addv4hi3;
1997 unpack_l = gen_unpack2_l;
1998 unpack_h = gen_unpack2_h;
1999 plus = gen_addv2si3;
2005 /* Fill in x with the sign extension of each element in op1. */
2007 x = CONST0_RTX (mode);
2012 x = gen_reg_rtx (mode);
2014 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
2019 l = gen_reg_rtx (wmode);
2020 h = gen_reg_rtx (wmode);
2021 s = gen_reg_rtx (wmode);
2023 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
2024 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
2025 emit_insn (plus (s, l, operands[2]));
2026 emit_insn (plus (operands[0], h, s));
2029 /* Emit a signed or unsigned V8QI dot product operation. */
2032 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
2034 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
2036 /* Fill in x1 and x2 with the sign extension of each element. */
2038 x1 = x2 = CONST0_RTX (V8QImode);
2043 x1 = gen_reg_rtx (V8QImode);
2044 x2 = gen_reg_rtx (V8QImode);
2046 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
2047 CONST0_RTX (V8QImode));
2049 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
2050 CONST0_RTX (V8QImode));
2054 l1 = gen_reg_rtx (V4HImode);
2055 l2 = gen_reg_rtx (V4HImode);
2056 h1 = gen_reg_rtx (V4HImode);
2057 h2 = gen_reg_rtx (V4HImode);
2059 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
2060 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
2061 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
2062 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
2064 p1 = gen_reg_rtx (V2SImode);
2065 p2 = gen_reg_rtx (V2SImode);
2066 p3 = gen_reg_rtx (V2SImode);
2067 p4 = gen_reg_rtx (V2SImode);
2068 emit_insn (gen_pmpy2_r (p1, l1, l2));
2069 emit_insn (gen_pmpy2_l (p2, l1, l2));
2070 emit_insn (gen_pmpy2_r (p3, h1, h2));
2071 emit_insn (gen_pmpy2_l (p4, h1, h2));
2073 s1 = gen_reg_rtx (V2SImode);
2074 s2 = gen_reg_rtx (V2SImode);
2075 s3 = gen_reg_rtx (V2SImode);
2076 emit_insn (gen_addv2si3 (s1, p1, p2));
2077 emit_insn (gen_addv2si3 (s2, p3, p4));
2078 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2079 emit_insn (gen_addv2si3 (operands[0], s2, s3));
2082 /* Emit the appropriate sequence for a call. */
2085 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2090 addr = XEXP (addr, 0);
2091 addr = convert_memory_address (DImode, addr);
2092 b0 = gen_rtx_REG (DImode, R_BR (0));
2094 /* ??? Should do this for functions known to bind local too. */
2095 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2098 insn = gen_sibcall_nogp (addr);
2100 insn = gen_call_nogp (addr, b0);
2102 insn = gen_call_value_nogp (retval, addr, b0);
2103 insn = emit_call_insn (insn);
2108 insn = gen_sibcall_gp (addr);
2110 insn = gen_call_gp (addr, b0);
2112 insn = gen_call_value_gp (retval, addr, b0);
2113 insn = emit_call_insn (insn);
2115 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2119 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2121 if (TARGET_ABI_OPEN_VMS)
2122 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2123 gen_rtx_REG (DImode, GR_REG (25)));
2127 reg_emitted (enum ia64_frame_regs r)
2129 if (emitted_frame_related_regs[r] == 0)
2130 emitted_frame_related_regs[r] = current_frame_info.r[r];
2132 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2136 get_reg (enum ia64_frame_regs r)
2139 return current_frame_info.r[r];
2143 is_emitted (int regno)
2147 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2148 if (emitted_frame_related_regs[r] == regno)
2154 ia64_reload_gp (void)
2158 if (current_frame_info.r[reg_save_gp])
2160 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2164 HOST_WIDE_INT offset;
2167 offset = (current_frame_info.spill_cfa_off
2168 + current_frame_info.spill_size);
2169 if (frame_pointer_needed)
2171 tmp = hard_frame_pointer_rtx;
2176 tmp = stack_pointer_rtx;
2177 offset = current_frame_info.total_size - offset;
2180 offset_r = GEN_INT (offset);
2181 if (satisfies_constraint_I (offset_r))
2182 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2185 emit_move_insn (pic_offset_table_rtx, offset_r);
2186 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2187 pic_offset_table_rtx, tmp));
2190 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2193 emit_move_insn (pic_offset_table_rtx, tmp);
2197 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2198 rtx scratch_b, int noreturn_p, int sibcall_p)
2201 bool is_desc = false;
2203 /* If we find we're calling through a register, then we're actually
2204 calling through a descriptor, so load up the values. */
2205 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2210 /* ??? We are currently constrained to *not* use peep2, because
2211 we can legitimately change the global lifetime of the GP
2212 (in the form of killing where previously live). This is
2213 because a call through a descriptor doesn't use the previous
2214 value of the GP, while a direct call does, and we do not
2215 commit to either form until the split here.
2217 That said, this means that we lack precise life info for
2218 whether ADDR is dead after this call. This is not terribly
2219 important, since we can fix things up essentially for free
2220 with the POST_DEC below, but it's nice to not use it when we
2221 can immediately tell it's not necessary. */
2222 addr_dead_p = ((noreturn_p || sibcall_p
2223 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2225 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2227 /* Load the code address into scratch_b. */
2228 tmp = gen_rtx_POST_INC (Pmode, addr);
2229 tmp = gen_rtx_MEM (Pmode, tmp);
2230 emit_move_insn (scratch_r, tmp);
2231 emit_move_insn (scratch_b, scratch_r);
2233 /* Load the GP address. If ADDR is not dead here, then we must
2234 revert the change made above via the POST_INCREMENT. */
2236 tmp = gen_rtx_POST_DEC (Pmode, addr);
2239 tmp = gen_rtx_MEM (Pmode, tmp);
2240 emit_move_insn (pic_offset_table_rtx, tmp);
2247 insn = gen_sibcall_nogp (addr);
2249 insn = gen_call_value_nogp (retval, addr, retaddr);
2251 insn = gen_call_nogp (addr, retaddr);
2252 emit_call_insn (insn);
2254 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2258 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2260 This differs from the generic code in that we know about the zero-extending
2261 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2262 also know that ld.acq+cmpxchg.rel equals a full barrier.
2264 The loop we want to generate looks like
2269 new_reg = cmp_reg op val;
2270 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2271 if (cmp_reg != old_reg)
2274 Note that we only do the plain load from memory once. Subsequent
2275 iterations use the value loaded by the compare-and-swap pattern. */
2278 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2279 rtx old_dst, rtx new_dst)
2281 enum machine_mode mode = GET_MODE (mem);
2282 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2283 enum insn_code icode;
2285 /* Special case for using fetchadd. */
2286 if ((mode == SImode || mode == DImode)
2287 && (code == PLUS || code == MINUS)
2288 && fetchadd_operand (val, mode))
2291 val = GEN_INT (-INTVAL (val));
2294 old_dst = gen_reg_rtx (mode);
2296 emit_insn (gen_memory_barrier ());
2299 icode = CODE_FOR_fetchadd_acq_si;
2301 icode = CODE_FOR_fetchadd_acq_di;
2302 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2306 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2308 if (new_reg != new_dst)
2309 emit_move_insn (new_dst, new_reg);
2314 /* Because of the volatile mem read, we get an ld.acq, which is the
2315 front half of the full barrier. The end half is the cmpxchg.rel. */
2316 gcc_assert (MEM_VOLATILE_P (mem));
2318 old_reg = gen_reg_rtx (DImode);
2319 cmp_reg = gen_reg_rtx (DImode);
2320 label = gen_label_rtx ();
2324 val = simplify_gen_subreg (DImode, val, mode, 0);
2325 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2328 emit_move_insn (cmp_reg, mem);
2332 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2333 emit_move_insn (old_reg, cmp_reg);
2334 emit_move_insn (ar_ccv, cmp_reg);
2337 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2342 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2343 true, OPTAB_DIRECT);
2344 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2347 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2348 true, OPTAB_DIRECT);
2351 new_reg = gen_lowpart (mode, new_reg);
2353 emit_move_insn (new_dst, new_reg);
2357 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2358 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2359 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2360 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2365 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2367 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2370 /* Begin the assembly file. */
2373 ia64_file_start (void)
2375 /* Variable tracking should be run after all optimizations which change order
2376 of insns. It also needs a valid CFG. This can't be done in
2377 ia64_option_override, because flag_var_tracking is finalized after
2379 ia64_flag_var_tracking = flag_var_tracking;
2380 flag_var_tracking = 0;
2382 default_file_start ();
2383 emit_safe_across_calls ();
2387 emit_safe_across_calls (void)
2389 unsigned int rs, re;
2396 while (rs < 64 && call_used_regs[PR_REG (rs)])
2400 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2404 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2408 fputc (',', asm_out_file);
2410 fprintf (asm_out_file, "p%u", rs);
2412 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2416 fputc ('\n', asm_out_file);
2419 /* Globalize a declaration. */
2422 ia64_globalize_decl_name (FILE * stream, tree decl)
2424 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2425 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2428 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2429 const char *p = TREE_STRING_POINTER (v);
2430 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2432 targetm.asm_out.globalize_label (stream, name);
2433 if (TREE_CODE (decl) == FUNCTION_DECL)
2434 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2437 /* Helper function for ia64_compute_frame_size: find an appropriate general
2438 register to spill some special register to. SPECIAL_SPILL_MASK contains
2439 bits in GR0 to GR31 that have already been allocated by this routine.
2440 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2443 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2447 if (emitted_frame_related_regs[r] != 0)
2449 regno = emitted_frame_related_regs[r];
2450 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2451 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2452 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2453 else if (current_function_is_leaf
2454 && regno >= GR_REG (1) && regno <= GR_REG (31))
2455 current_frame_info.gr_used_mask |= 1 << regno;
2460 /* If this is a leaf function, first try an otherwise unused
2461 call-clobbered register. */
2462 if (current_function_is_leaf)
2464 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2465 if (! df_regs_ever_live_p (regno)
2466 && call_used_regs[regno]
2467 && ! fixed_regs[regno]
2468 && ! global_regs[regno]
2469 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2470 && ! is_emitted (regno))
2472 current_frame_info.gr_used_mask |= 1 << regno;
2479 regno = current_frame_info.n_local_regs;
2480 /* If there is a frame pointer, then we can't use loc79, because
2481 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2482 reg_name switching code in ia64_expand_prologue. */
2483 while (regno < (80 - frame_pointer_needed))
2484 if (! is_emitted (LOC_REG (regno++)))
2486 current_frame_info.n_local_regs = regno;
2487 return LOC_REG (regno - 1);
2491 /* Failed to find a general register to spill to. Must use stack. */
2495 /* In order to make for nice schedules, we try to allocate every temporary
2496 to a different register. We must of course stay away from call-saved,
2497 fixed, and global registers. We must also stay away from registers
2498 allocated in current_frame_info.gr_used_mask, since those include regs
2499 used all through the prologue.
2501 Any register allocated here must be used immediately. The idea is to
2502 aid scheduling, not to solve data flow problems. */
2504 static int last_scratch_gr_reg;
2507 next_scratch_gr_reg (void)
2511 for (i = 0; i < 32; ++i)
2513 regno = (last_scratch_gr_reg + i + 1) & 31;
2514 if (call_used_regs[regno]
2515 && ! fixed_regs[regno]
2516 && ! global_regs[regno]
2517 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2519 last_scratch_gr_reg = regno;
2524 /* There must be _something_ available. */
2528 /* Helper function for ia64_compute_frame_size, called through
2529 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2532 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2534 unsigned int regno = REGNO (reg);
2537 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2538 for (i = 0; i < n; ++i)
2539 current_frame_info.gr_used_mask |= 1 << (regno + i);
2544 /* Returns the number of bytes offset between the frame pointer and the stack
2545 pointer for the current function. SIZE is the number of bytes of space
2546 needed for local variables. */
2549 ia64_compute_frame_size (HOST_WIDE_INT size)
2551 HOST_WIDE_INT total_size;
2552 HOST_WIDE_INT spill_size = 0;
2553 HOST_WIDE_INT extra_spill_size = 0;
2554 HOST_WIDE_INT pretend_args_size;
2557 int spilled_gr_p = 0;
2558 int spilled_fr_p = 0;
2564 if (current_frame_info.initialized)
2567 memset (¤t_frame_info, 0, sizeof current_frame_info);
2568 CLEAR_HARD_REG_SET (mask);
2570 /* Don't allocate scratches to the return register. */
2571 diddle_return_value (mark_reg_gr_used_mask, NULL);
2573 /* Don't allocate scratches to the EH scratch registers. */
2574 if (cfun->machine->ia64_eh_epilogue_sp)
2575 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2576 if (cfun->machine->ia64_eh_epilogue_bsp)
2577 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2579 /* Find the size of the register stack frame. We have only 80 local
2580 registers, because we reserve 8 for the inputs and 8 for the
2583 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2584 since we'll be adjusting that down later. */
2585 regno = LOC_REG (78) + ! frame_pointer_needed;
2586 for (; regno >= LOC_REG (0); regno--)
2587 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2589 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2591 /* For functions marked with the syscall_linkage attribute, we must mark
2592 all eight input registers as in use, so that locals aren't visible to
2595 if (cfun->machine->n_varargs > 0
2596 || lookup_attribute ("syscall_linkage",
2597 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2598 current_frame_info.n_input_regs = 8;
2601 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2602 if (df_regs_ever_live_p (regno))
2604 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2607 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2608 if (df_regs_ever_live_p (regno))
2610 i = regno - OUT_REG (0) + 1;
2612 #ifndef PROFILE_HOOK
2613 /* When -p profiling, we need one output register for the mcount argument.
2614 Likewise for -a profiling for the bb_init_func argument. For -ax
2615 profiling, we need two output registers for the two bb_init_trace_func
2620 current_frame_info.n_output_regs = i;
2622 /* ??? No rotating register support yet. */
2623 current_frame_info.n_rotate_regs = 0;
2625 /* Discover which registers need spilling, and how much room that
2626 will take. Begin with floating point and general registers,
2627 which will always wind up on the stack. */
2629 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2630 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2632 SET_HARD_REG_BIT (mask, regno);
2638 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2639 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2641 SET_HARD_REG_BIT (mask, regno);
2647 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2648 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2650 SET_HARD_REG_BIT (mask, regno);
2655 /* Now come all special registers that might get saved in other
2656 general registers. */
2658 if (frame_pointer_needed)
2660 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2661 /* If we did not get a register, then we take LOC79. This is guaranteed
2662 to be free, even if regs_ever_live is already set, because this is
2663 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2664 as we don't count loc79 above. */
2665 if (current_frame_info.r[reg_fp] == 0)
2667 current_frame_info.r[reg_fp] = LOC_REG (79);
2668 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2672 if (! current_function_is_leaf)
2674 /* Emit a save of BR0 if we call other functions. Do this even
2675 if this function doesn't return, as EH depends on this to be
2676 able to unwind the stack. */
2677 SET_HARD_REG_BIT (mask, BR_REG (0));
2679 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2680 if (current_frame_info.r[reg_save_b0] == 0)
2682 extra_spill_size += 8;
2686 /* Similarly for ar.pfs. */
2687 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2688 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2689 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2691 extra_spill_size += 8;
2695 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2696 registers are clobbered, so we fall back to the stack. */
2697 current_frame_info.r[reg_save_gp]
2698 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2699 if (current_frame_info.r[reg_save_gp] == 0)
2701 SET_HARD_REG_BIT (mask, GR_REG (1));
2708 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2710 SET_HARD_REG_BIT (mask, BR_REG (0));
2711 extra_spill_size += 8;
2715 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2717 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2718 current_frame_info.r[reg_save_ar_pfs]
2719 = find_gr_spill (reg_save_ar_pfs, 1);
2720 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2722 extra_spill_size += 8;
2728 /* Unwind descriptor hackery: things are most efficient if we allocate
2729 consecutive GR save registers for RP, PFS, FP in that order. However,
2730 it is absolutely critical that FP get the only hard register that's
2731 guaranteed to be free, so we allocated it first. If all three did
2732 happen to be allocated hard regs, and are consecutive, rearrange them
2733 into the preferred order now.
2735 If we have already emitted code for any of those registers,
2736 then it's already too late to change. */
2737 min_regno = MIN (current_frame_info.r[reg_fp],
2738 MIN (current_frame_info.r[reg_save_b0],
2739 current_frame_info.r[reg_save_ar_pfs]));
2740 max_regno = MAX (current_frame_info.r[reg_fp],
2741 MAX (current_frame_info.r[reg_save_b0],
2742 current_frame_info.r[reg_save_ar_pfs]));
2744 && min_regno + 2 == max_regno
2745 && (current_frame_info.r[reg_fp] == min_regno + 1
2746 || current_frame_info.r[reg_save_b0] == min_regno + 1
2747 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2748 && (emitted_frame_related_regs[reg_save_b0] == 0
2749 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2750 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2751 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2752 && (emitted_frame_related_regs[reg_fp] == 0
2753 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2755 current_frame_info.r[reg_save_b0] = min_regno;
2756 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2757 current_frame_info.r[reg_fp] = min_regno + 2;
2760 /* See if we need to store the predicate register block. */
2761 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2762 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2764 if (regno <= PR_REG (63))
2766 SET_HARD_REG_BIT (mask, PR_REG (0));
2767 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2768 if (current_frame_info.r[reg_save_pr] == 0)
2770 extra_spill_size += 8;
2774 /* ??? Mark them all as used so that register renaming and such
2775 are free to use them. */
2776 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2777 df_set_regs_ever_live (regno, true);
2780 /* If we're forced to use st8.spill, we're forced to save and restore
2781 ar.unat as well. The check for existing liveness allows inline asm
2782 to touch ar.unat. */
2783 if (spilled_gr_p || cfun->machine->n_varargs
2784 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2786 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2787 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2788 current_frame_info.r[reg_save_ar_unat]
2789 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2790 if (current_frame_info.r[reg_save_ar_unat] == 0)
2792 extra_spill_size += 8;
2797 if (df_regs_ever_live_p (AR_LC_REGNUM))
2799 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2800 current_frame_info.r[reg_save_ar_lc]
2801 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2802 if (current_frame_info.r[reg_save_ar_lc] == 0)
2804 extra_spill_size += 8;
2809 /* If we have an odd number of words of pretend arguments written to
2810 the stack, then the FR save area will be unaligned. We round the
2811 size of this area up to keep things 16 byte aligned. */
2813 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2815 pretend_args_size = crtl->args.pretend_args_size;
2817 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2818 + crtl->outgoing_args_size);
2819 total_size = IA64_STACK_ALIGN (total_size);
2821 /* We always use the 16-byte scratch area provided by the caller, but
2822 if we are a leaf function, there's no one to which we need to provide
2824 if (current_function_is_leaf)
2825 total_size = MAX (0, total_size - 16);
2827 current_frame_info.total_size = total_size;
2828 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2829 current_frame_info.spill_size = spill_size;
2830 current_frame_info.extra_spill_size = extra_spill_size;
2831 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2832 current_frame_info.n_spilled = n_spilled;
2833 current_frame_info.initialized = reload_completed;
2836 /* Worker function for TARGET_CAN_ELIMINATE. */
2839 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2841 return (to == BR_REG (0) ? current_function_is_leaf : true);
2844 /* Compute the initial difference between the specified pair of registers. */
2847 ia64_initial_elimination_offset (int from, int to)
2849 HOST_WIDE_INT offset;
2851 ia64_compute_frame_size (get_frame_size ());
2854 case FRAME_POINTER_REGNUM:
2857 case HARD_FRAME_POINTER_REGNUM:
2858 if (current_function_is_leaf)
2859 offset = -current_frame_info.total_size;
2861 offset = -(current_frame_info.total_size
2862 - crtl->outgoing_args_size - 16);
2865 case STACK_POINTER_REGNUM:
2866 if (current_function_is_leaf)
2869 offset = 16 + crtl->outgoing_args_size;
2877 case ARG_POINTER_REGNUM:
2878 /* Arguments start above the 16 byte save area, unless stdarg
2879 in which case we store through the 16 byte save area. */
2882 case HARD_FRAME_POINTER_REGNUM:
2883 offset = 16 - crtl->args.pretend_args_size;
2886 case STACK_POINTER_REGNUM:
2887 offset = (current_frame_info.total_size
2888 + 16 - crtl->args.pretend_args_size);
2903 /* If there are more than a trivial number of register spills, we use
2904 two interleaved iterators so that we can get two memory references
2907 In order to simplify things in the prologue and epilogue expanders,
2908 we use helper functions to fix up the memory references after the
2909 fact with the appropriate offsets to a POST_MODIFY memory mode.
2910 The following data structure tracks the state of the two iterators
2911 while insns are being emitted. */
2913 struct spill_fill_data
2915 rtx init_after; /* point at which to emit initializations */
2916 rtx init_reg[2]; /* initial base register */
2917 rtx iter_reg[2]; /* the iterator registers */
2918 rtx *prev_addr[2]; /* address of last memory use */
2919 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2920 HOST_WIDE_INT prev_off[2]; /* last offset */
2921 int n_iter; /* number of iterators in use */
2922 int next_iter; /* next iterator to use */
2923 unsigned int save_gr_used_mask;
2926 static struct spill_fill_data spill_fill_data;
2929 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2933 spill_fill_data.init_after = get_last_insn ();
2934 spill_fill_data.init_reg[0] = init_reg;
2935 spill_fill_data.init_reg[1] = init_reg;
2936 spill_fill_data.prev_addr[0] = NULL;
2937 spill_fill_data.prev_addr[1] = NULL;
2938 spill_fill_data.prev_insn[0] = NULL;
2939 spill_fill_data.prev_insn[1] = NULL;
2940 spill_fill_data.prev_off[0] = cfa_off;
2941 spill_fill_data.prev_off[1] = cfa_off;
2942 spill_fill_data.next_iter = 0;
2943 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2945 spill_fill_data.n_iter = 1 + (n_spills > 2);
2946 for (i = 0; i < spill_fill_data.n_iter; ++i)
2948 int regno = next_scratch_gr_reg ();
2949 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2950 current_frame_info.gr_used_mask |= 1 << regno;
2955 finish_spill_pointers (void)
2957 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2961 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2963 int iter = spill_fill_data.next_iter;
2964 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2965 rtx disp_rtx = GEN_INT (disp);
2968 if (spill_fill_data.prev_addr[iter])
2970 if (satisfies_constraint_N (disp_rtx))
2972 *spill_fill_data.prev_addr[iter]
2973 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2974 gen_rtx_PLUS (DImode,
2975 spill_fill_data.iter_reg[iter],
2977 add_reg_note (spill_fill_data.prev_insn[iter],
2978 REG_INC, spill_fill_data.iter_reg[iter]);
2982 /* ??? Could use register post_modify for loads. */
2983 if (!satisfies_constraint_I (disp_rtx))
2985 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2986 emit_move_insn (tmp, disp_rtx);
2989 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2990 spill_fill_data.iter_reg[iter], disp_rtx));
2993 /* Micro-optimization: if we've created a frame pointer, it's at
2994 CFA 0, which may allow the real iterator to be initialized lower,
2995 slightly increasing parallelism. Also, if there are few saves
2996 it may eliminate the iterator entirely. */
2998 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2999 && frame_pointer_needed)
3001 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3002 set_mem_alias_set (mem, get_varargs_alias_set ());
3010 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3011 spill_fill_data.init_reg[iter]);
3016 if (!satisfies_constraint_I (disp_rtx))
3018 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3019 emit_move_insn (tmp, disp_rtx);
3023 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3024 spill_fill_data.init_reg[iter],
3031 /* Careful for being the first insn in a sequence. */
3032 if (spill_fill_data.init_after)
3033 insn = emit_insn_after (seq, spill_fill_data.init_after);
3036 rtx first = get_insns ();
3038 insn = emit_insn_before (seq, first);
3040 insn = emit_insn (seq);
3042 spill_fill_data.init_after = insn;
3045 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3047 /* ??? Not all of the spills are for varargs, but some of them are.
3048 The rest of the spills belong in an alias set of their own. But
3049 it doesn't actually hurt to include them here. */
3050 set_mem_alias_set (mem, get_varargs_alias_set ());
3052 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3053 spill_fill_data.prev_off[iter] = cfa_off;
3055 if (++iter >= spill_fill_data.n_iter)
3057 spill_fill_data.next_iter = iter;
3063 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3066 int iter = spill_fill_data.next_iter;
3069 mem = spill_restore_mem (reg, cfa_off);
3070 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3071 spill_fill_data.prev_insn[iter] = insn;
3078 RTX_FRAME_RELATED_P (insn) = 1;
3080 /* Don't even pretend that the unwind code can intuit its way
3081 through a pair of interleaved post_modify iterators. Just
3082 provide the correct answer. */
3084 if (frame_pointer_needed)
3086 base = hard_frame_pointer_rtx;
3091 base = stack_pointer_rtx;
3092 off = current_frame_info.total_size - cfa_off;
3095 add_reg_note (insn, REG_CFA_OFFSET,
3096 gen_rtx_SET (VOIDmode,
3097 gen_rtx_MEM (GET_MODE (reg),
3098 plus_constant (base, off)),
3104 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3106 int iter = spill_fill_data.next_iter;
3109 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3110 GEN_INT (cfa_off)));
3111 spill_fill_data.prev_insn[iter] = insn;
3114 /* Wrapper functions that discards the CONST_INT spill offset. These
3115 exist so that we can give gr_spill/gr_fill the offset they need and
3116 use a consistent function interface. */
3119 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3121 return gen_movdi (dest, src);
3125 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3127 return gen_fr_spill (dest, src);
3131 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3133 return gen_fr_restore (dest, src);
3136 /* Called after register allocation to add any instructions needed for the
3137 prologue. Using a prologue insn is favored compared to putting all of the
3138 instructions in output_function_prologue(), since it allows the scheduler
3139 to intermix instructions with the saves of the caller saved registers. In
3140 some cases, it might be necessary to emit a barrier instruction as the last
3141 insn to prevent such scheduling.
3143 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3144 so that the debug info generation code can handle them properly.
3146 The register save area is layed out like so:
3148 [ varargs spill area ]
3149 [ fr register spill area ]
3150 [ br register spill area ]
3151 [ ar register spill area ]
3152 [ pr register spill area ]
3153 [ gr register spill area ] */
3155 /* ??? Get inefficient code when the frame size is larger than can fit in an
3156 adds instruction. */
3159 ia64_expand_prologue (void)
3161 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3162 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3165 ia64_compute_frame_size (get_frame_size ());
3166 last_scratch_gr_reg = 15;
3168 if (flag_stack_usage)
3169 current_function_static_stack_size = current_frame_info.total_size;
3173 fprintf (dump_file, "ia64 frame related registers "
3174 "recorded in current_frame_info.r[]:\n");
3175 #define PRINTREG(a) if (current_frame_info.r[a]) \
3176 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3178 PRINTREG(reg_save_b0);
3179 PRINTREG(reg_save_pr);
3180 PRINTREG(reg_save_ar_pfs);
3181 PRINTREG(reg_save_ar_unat);
3182 PRINTREG(reg_save_ar_lc);
3183 PRINTREG(reg_save_gp);
3187 /* If there is no epilogue, then we don't need some prologue insns.
3188 We need to avoid emitting the dead prologue insns, because flow
3189 will complain about them. */
3195 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3196 if ((e->flags & EDGE_FAKE) == 0
3197 && (e->flags & EDGE_FALLTHRU) != 0)
3199 epilogue_p = (e != NULL);
3204 /* Set the local, input, and output register names. We need to do this
3205 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3206 half. If we use in/loc/out register names, then we get assembler errors
3207 in crtn.S because there is no alloc insn or regstk directive in there. */
3208 if (! TARGET_REG_NAMES)
3210 int inputs = current_frame_info.n_input_regs;
3211 int locals = current_frame_info.n_local_regs;
3212 int outputs = current_frame_info.n_output_regs;
3214 for (i = 0; i < inputs; i++)
3215 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3216 for (i = 0; i < locals; i++)
3217 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3218 for (i = 0; i < outputs; i++)
3219 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3222 /* Set the frame pointer register name. The regnum is logically loc79,
3223 but of course we'll not have allocated that many locals. Rather than
3224 worrying about renumbering the existing rtxs, we adjust the name. */
3225 /* ??? This code means that we can never use one local register when
3226 there is a frame pointer. loc79 gets wasted in this case, as it is
3227 renamed to a register that will never be used. See also the try_locals
3228 code in find_gr_spill. */
3229 if (current_frame_info.r[reg_fp])
3231 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3232 reg_names[HARD_FRAME_POINTER_REGNUM]
3233 = reg_names[current_frame_info.r[reg_fp]];
3234 reg_names[current_frame_info.r[reg_fp]] = tmp;
3237 /* We don't need an alloc instruction if we've used no outputs or locals. */
3238 if (current_frame_info.n_local_regs == 0
3239 && current_frame_info.n_output_regs == 0
3240 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3241 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3243 /* If there is no alloc, but there are input registers used, then we
3244 need a .regstk directive. */
3245 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3246 ar_pfs_save_reg = NULL_RTX;
3250 current_frame_info.need_regstk = 0;
3252 if (current_frame_info.r[reg_save_ar_pfs])
3254 regno = current_frame_info.r[reg_save_ar_pfs];
3255 reg_emitted (reg_save_ar_pfs);
3258 regno = next_scratch_gr_reg ();
3259 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3261 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3262 GEN_INT (current_frame_info.n_input_regs),
3263 GEN_INT (current_frame_info.n_local_regs),
3264 GEN_INT (current_frame_info.n_output_regs),
3265 GEN_INT (current_frame_info.n_rotate_regs)));
3266 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3269 /* Set up frame pointer, stack pointer, and spill iterators. */
3271 n_varargs = cfun->machine->n_varargs;
3272 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3273 stack_pointer_rtx, 0);
3275 if (frame_pointer_needed)
3277 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3278 RTX_FRAME_RELATED_P (insn) = 1;
3280 /* Force the unwind info to recognize this as defining a new CFA,
3281 rather than some temp register setup. */
3282 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3285 if (current_frame_info.total_size != 0)
3287 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3290 if (satisfies_constraint_I (frame_size_rtx))
3291 offset = frame_size_rtx;
3294 regno = next_scratch_gr_reg ();
3295 offset = gen_rtx_REG (DImode, regno);
3296 emit_move_insn (offset, frame_size_rtx);
3299 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3300 stack_pointer_rtx, offset));
3302 if (! frame_pointer_needed)
3304 RTX_FRAME_RELATED_P (insn) = 1;
3305 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3306 gen_rtx_SET (VOIDmode,
3308 gen_rtx_PLUS (DImode,
3313 /* ??? At this point we must generate a magic insn that appears to
3314 modify the stack pointer, the frame pointer, and all spill
3315 iterators. This would allow the most scheduling freedom. For
3316 now, just hard stop. */
3317 emit_insn (gen_blockage ());
3320 /* Must copy out ar.unat before doing any integer spills. */
3321 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3323 if (current_frame_info.r[reg_save_ar_unat])
3326 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3327 reg_emitted (reg_save_ar_unat);
3331 alt_regno = next_scratch_gr_reg ();
3332 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3333 current_frame_info.gr_used_mask |= 1 << alt_regno;
3336 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3337 insn = emit_move_insn (ar_unat_save_reg, reg);
3338 if (current_frame_info.r[reg_save_ar_unat])
3340 RTX_FRAME_RELATED_P (insn) = 1;
3341 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3344 /* Even if we're not going to generate an epilogue, we still
3345 need to save the register so that EH works. */
3346 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3347 emit_insn (gen_prologue_use (ar_unat_save_reg));
3350 ar_unat_save_reg = NULL_RTX;
3352 /* Spill all varargs registers. Do this before spilling any GR registers,
3353 since we want the UNAT bits for the GR registers to override the UNAT
3354 bits from varargs, which we don't care about. */
3357 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3359 reg = gen_rtx_REG (DImode, regno);
3360 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3363 /* Locate the bottom of the register save area. */
3364 cfa_off = (current_frame_info.spill_cfa_off
3365 + current_frame_info.spill_size
3366 + current_frame_info.extra_spill_size);
3368 /* Save the predicate register block either in a register or in memory. */
3369 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3371 reg = gen_rtx_REG (DImode, PR_REG (0));
3372 if (current_frame_info.r[reg_save_pr] != 0)
3374 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3375 reg_emitted (reg_save_pr);
3376 insn = emit_move_insn (alt_reg, reg);
3378 /* ??? Denote pr spill/fill by a DImode move that modifies all
3379 64 hard registers. */
3380 RTX_FRAME_RELATED_P (insn) = 1;
3381 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3383 /* Even if we're not going to generate an epilogue, we still
3384 need to save the register so that EH works. */
3386 emit_insn (gen_prologue_use (alt_reg));
3390 alt_regno = next_scratch_gr_reg ();
3391 alt_reg = gen_rtx_REG (DImode, alt_regno);
3392 insn = emit_move_insn (alt_reg, reg);
3393 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3398 /* Handle AR regs in numerical order. All of them get special handling. */
3399 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3400 && current_frame_info.r[reg_save_ar_unat] == 0)
3402 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3403 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3407 /* The alloc insn already copied ar.pfs into a general register. The
3408 only thing we have to do now is copy that register to a stack slot
3409 if we'd not allocated a local register for the job. */
3410 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3411 && current_frame_info.r[reg_save_ar_pfs] == 0)
3413 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3414 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3418 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3420 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3421 if (current_frame_info.r[reg_save_ar_lc] != 0)
3423 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3424 reg_emitted (reg_save_ar_lc);
3425 insn = emit_move_insn (alt_reg, reg);
3426 RTX_FRAME_RELATED_P (insn) = 1;
3427 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3429 /* Even if we're not going to generate an epilogue, we still
3430 need to save the register so that EH works. */
3432 emit_insn (gen_prologue_use (alt_reg));
3436 alt_regno = next_scratch_gr_reg ();
3437 alt_reg = gen_rtx_REG (DImode, alt_regno);
3438 emit_move_insn (alt_reg, reg);
3439 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3444 /* Save the return pointer. */
3445 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3447 reg = gen_rtx_REG (DImode, BR_REG (0));
3448 if (current_frame_info.r[reg_save_b0] != 0)
3450 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3451 reg_emitted (reg_save_b0);
3452 insn = emit_move_insn (alt_reg, reg);
3453 RTX_FRAME_RELATED_P (insn) = 1;
3454 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3456 /* Even if we're not going to generate an epilogue, we still
3457 need to save the register so that EH works. */
3459 emit_insn (gen_prologue_use (alt_reg));
3463 alt_regno = next_scratch_gr_reg ();
3464 alt_reg = gen_rtx_REG (DImode, alt_regno);
3465 emit_move_insn (alt_reg, reg);
3466 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3471 if (current_frame_info.r[reg_save_gp])
3473 reg_emitted (reg_save_gp);
3474 insn = emit_move_insn (gen_rtx_REG (DImode,
3475 current_frame_info.r[reg_save_gp]),
3476 pic_offset_table_rtx);
3479 /* We should now be at the base of the gr/br/fr spill area. */
3480 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3481 + current_frame_info.spill_size));
3483 /* Spill all general registers. */
3484 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3485 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3487 reg = gen_rtx_REG (DImode, regno);
3488 do_spill (gen_gr_spill, reg, cfa_off, reg);
3492 /* Spill the rest of the BR registers. */
3493 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3494 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3496 alt_regno = next_scratch_gr_reg ();
3497 alt_reg = gen_rtx_REG (DImode, alt_regno);
3498 reg = gen_rtx_REG (DImode, regno);
3499 emit_move_insn (alt_reg, reg);
3500 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3504 /* Align the frame and spill all FR registers. */
3505 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3506 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3508 gcc_assert (!(cfa_off & 15));
3509 reg = gen_rtx_REG (XFmode, regno);
3510 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3514 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3516 finish_spill_pointers ();
3519 /* Output the textual info surrounding the prologue. */
3522 ia64_start_function (FILE *file, const char *fnname,
3523 tree decl ATTRIBUTE_UNUSED)
3525 #if VMS_DEBUGGING_INFO
3527 && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
3529 targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
3530 ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
3531 dwarf2out_vms_debug_main_pointer ();
3536 fputs ("\t.proc ", file);
3537 assemble_name (file, fnname);
3539 ASM_OUTPUT_LABEL (file, fnname);
3542 /* Called after register allocation to add any instructions needed for the
3543 epilogue. Using an epilogue insn is favored compared to putting all of the
3544 instructions in output_function_prologue(), since it allows the scheduler
3545 to intermix instructions with the saves of the caller saved registers. In
3546 some cases, it might be necessary to emit a barrier instruction as the last
3547 insn to prevent such scheduling. */
3550 ia64_expand_epilogue (int sibcall_p)
3552 rtx insn, reg, alt_reg, ar_unat_save_reg;
3553 int regno, alt_regno, cfa_off;
3555 ia64_compute_frame_size (get_frame_size ());
3557 /* If there is a frame pointer, then we use it instead of the stack
3558 pointer, so that the stack pointer does not need to be valid when
3559 the epilogue starts. See EXIT_IGNORE_STACK. */
3560 if (frame_pointer_needed)
3561 setup_spill_pointers (current_frame_info.n_spilled,
3562 hard_frame_pointer_rtx, 0);
3564 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3565 current_frame_info.total_size);
3567 if (current_frame_info.total_size != 0)
3569 /* ??? At this point we must generate a magic insn that appears to
3570 modify the spill iterators and the frame pointer. This would
3571 allow the most scheduling freedom. For now, just hard stop. */
3572 emit_insn (gen_blockage ());
3575 /* Locate the bottom of the register save area. */
3576 cfa_off = (current_frame_info.spill_cfa_off
3577 + current_frame_info.spill_size
3578 + current_frame_info.extra_spill_size);
3580 /* Restore the predicate registers. */
3581 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3583 if (current_frame_info.r[reg_save_pr] != 0)
3585 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3586 reg_emitted (reg_save_pr);
3590 alt_regno = next_scratch_gr_reg ();
3591 alt_reg = gen_rtx_REG (DImode, alt_regno);
3592 do_restore (gen_movdi_x, alt_reg, cfa_off);
3595 reg = gen_rtx_REG (DImode, PR_REG (0));
3596 emit_move_insn (reg, alt_reg);
3599 /* Restore the application registers. */
3601 /* Load the saved unat from the stack, but do not restore it until
3602 after the GRs have been restored. */
3603 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3605 if (current_frame_info.r[reg_save_ar_unat] != 0)
3608 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3609 reg_emitted (reg_save_ar_unat);
3613 alt_regno = next_scratch_gr_reg ();
3614 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3615 current_frame_info.gr_used_mask |= 1 << alt_regno;
3616 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3621 ar_unat_save_reg = NULL_RTX;
3623 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3625 reg_emitted (reg_save_ar_pfs);
3626 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3627 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3628 emit_move_insn (reg, alt_reg);
3630 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3632 alt_regno = next_scratch_gr_reg ();
3633 alt_reg = gen_rtx_REG (DImode, alt_regno);
3634 do_restore (gen_movdi_x, alt_reg, cfa_off);
3636 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3637 emit_move_insn (reg, alt_reg);
3640 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3642 if (current_frame_info.r[reg_save_ar_lc] != 0)
3644 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3645 reg_emitted (reg_save_ar_lc);
3649 alt_regno = next_scratch_gr_reg ();
3650 alt_reg = gen_rtx_REG (DImode, alt_regno);
3651 do_restore (gen_movdi_x, alt_reg, cfa_off);
3654 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3655 emit_move_insn (reg, alt_reg);
3658 /* Restore the return pointer. */
3659 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3661 if (current_frame_info.r[reg_save_b0] != 0)
3663 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3664 reg_emitted (reg_save_b0);
3668 alt_regno = next_scratch_gr_reg ();
3669 alt_reg = gen_rtx_REG (DImode, alt_regno);
3670 do_restore (gen_movdi_x, alt_reg, cfa_off);
3673 reg = gen_rtx_REG (DImode, BR_REG (0));
3674 emit_move_insn (reg, alt_reg);
3677 /* We should now be at the base of the gr/br/fr spill area. */
3678 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3679 + current_frame_info.spill_size));
3681 /* The GP may be stored on the stack in the prologue, but it's
3682 never restored in the epilogue. Skip the stack slot. */
3683 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3686 /* Restore all general registers. */
3687 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3688 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3690 reg = gen_rtx_REG (DImode, regno);
3691 do_restore (gen_gr_restore, reg, cfa_off);
3695 /* Restore the branch registers. */
3696 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3697 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3699 alt_regno = next_scratch_gr_reg ();
3700 alt_reg = gen_rtx_REG (DImode, alt_regno);
3701 do_restore (gen_movdi_x, alt_reg, cfa_off);
3703 reg = gen_rtx_REG (DImode, regno);
3704 emit_move_insn (reg, alt_reg);
3707 /* Restore floating point registers. */
3708 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3709 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3711 gcc_assert (!(cfa_off & 15));
3712 reg = gen_rtx_REG (XFmode, regno);
3713 do_restore (gen_fr_restore_x, reg, cfa_off);
3717 /* Restore ar.unat for real. */
3718 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3720 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3721 emit_move_insn (reg, ar_unat_save_reg);
3724 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3726 finish_spill_pointers ();
3728 if (current_frame_info.total_size
3729 || cfun->machine->ia64_eh_epilogue_sp
3730 || frame_pointer_needed)
3732 /* ??? At this point we must generate a magic insn that appears to
3733 modify the spill iterators, the stack pointer, and the frame
3734 pointer. This would allow the most scheduling freedom. For now,
3736 emit_insn (gen_blockage ());
3739 if (cfun->machine->ia64_eh_epilogue_sp)
3740 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3741 else if (frame_pointer_needed)
3743 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3744 RTX_FRAME_RELATED_P (insn) = 1;
3745 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
3747 else if (current_frame_info.total_size)
3749 rtx offset, frame_size_rtx;
3751 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3752 if (satisfies_constraint_I (frame_size_rtx))
3753 offset = frame_size_rtx;
3756 regno = next_scratch_gr_reg ();
3757 offset = gen_rtx_REG (DImode, regno);
3758 emit_move_insn (offset, frame_size_rtx);
3761 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3764 RTX_FRAME_RELATED_P (insn) = 1;
3765 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3766 gen_rtx_SET (VOIDmode,
3768 gen_rtx_PLUS (DImode,
3773 if (cfun->machine->ia64_eh_epilogue_bsp)
3774 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3777 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3780 int fp = GR_REG (2);
3781 /* We need a throw away register here, r0 and r1 are reserved,
3782 so r2 is the first available call clobbered register. If
3783 there was a frame_pointer register, we may have swapped the
3784 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
3785 sure we're using the string "r2" when emitting the register
3786 name for the assembler. */
3787 if (current_frame_info.r[reg_fp]
3788 && current_frame_info.r[reg_fp] == GR_REG (2))
3789 fp = HARD_FRAME_POINTER_REGNUM;
3791 /* We must emit an alloc to force the input registers to become output
3792 registers. Otherwise, if the callee tries to pass its parameters
3793 through to another call without an intervening alloc, then these
3795 /* ??? We don't need to preserve all input registers. We only need to
3796 preserve those input registers used as arguments to the sibling call.
3797 It is unclear how to compute that number here. */
3798 if (current_frame_info.n_input_regs != 0)
3800 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3801 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3802 const0_rtx, const0_rtx,
3803 n_inputs, const0_rtx));
3804 RTX_FRAME_RELATED_P (insn) = 1;
3809 /* Return 1 if br.ret can do all the work required to return from a
3813 ia64_direct_return (void)
3815 if (reload_completed && ! frame_pointer_needed)
3817 ia64_compute_frame_size (get_frame_size ());
3819 return (current_frame_info.total_size == 0
3820 && current_frame_info.n_spilled == 0
3821 && current_frame_info.r[reg_save_b0] == 0
3822 && current_frame_info.r[reg_save_pr] == 0
3823 && current_frame_info.r[reg_save_ar_pfs] == 0
3824 && current_frame_info.r[reg_save_ar_unat] == 0
3825 && current_frame_info.r[reg_save_ar_lc] == 0);
3830 /* Return the magic cookie that we use to hold the return address
3831 during early compilation. */
3834 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3838 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3841 /* Split this value after reload, now that we know where the return
3842 address is saved. */
3845 ia64_split_return_addr_rtx (rtx dest)
3849 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3851 if (current_frame_info.r[reg_save_b0] != 0)
3853 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3854 reg_emitted (reg_save_b0);
3862 /* Compute offset from CFA for BR0. */
3863 /* ??? Must be kept in sync with ia64_expand_prologue. */
3864 off = (current_frame_info.spill_cfa_off
3865 + current_frame_info.spill_size);
3866 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3867 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3870 /* Convert CFA offset to a register based offset. */
3871 if (frame_pointer_needed)
3872 src = hard_frame_pointer_rtx;
3875 src = stack_pointer_rtx;
3876 off += current_frame_info.total_size;
3879 /* Load address into scratch register. */
3880 off_r = GEN_INT (off);
3881 if (satisfies_constraint_I (off_r))
3882 emit_insn (gen_adddi3 (dest, src, off_r));
3885 emit_move_insn (dest, off_r);
3886 emit_insn (gen_adddi3 (dest, src, dest));
3889 src = gen_rtx_MEM (Pmode, dest);
3893 src = gen_rtx_REG (DImode, BR_REG (0));
3895 emit_move_insn (dest, src);
3899 ia64_hard_regno_rename_ok (int from, int to)
3901 /* Don't clobber any of the registers we reserved for the prologue. */
3904 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3905 if (to == current_frame_info.r[r]
3906 || from == current_frame_info.r[r]
3907 || to == emitted_frame_related_regs[r]
3908 || from == emitted_frame_related_regs[r])
3911 /* Don't use output registers outside the register frame. */
3912 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3915 /* Retain even/oddness on predicate register pairs. */
3916 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3917 return (from & 1) == (to & 1);
3922 /* Target hook for assembling integer objects. Handle word-sized
3923 aligned objects and detect the cases when @fptr is needed. */
3926 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3928 if (size == POINTER_SIZE / BITS_PER_UNIT
3929 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3930 && GET_CODE (x) == SYMBOL_REF
3931 && SYMBOL_REF_FUNCTION_P (x))
3933 static const char * const directive[2][2] = {
3934 /* 64-bit pointer */ /* 32-bit pointer */
3935 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3936 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3938 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3939 output_addr_const (asm_out_file, x);
3940 fputs (")\n", asm_out_file);
3943 return default_assemble_integer (x, size, aligned_p);
3946 /* Emit the function prologue. */
3949 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3951 int mask, grsave, grsave_prev;
3953 if (current_frame_info.need_regstk)
3954 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3955 current_frame_info.n_input_regs,
3956 current_frame_info.n_local_regs,
3957 current_frame_info.n_output_regs,
3958 current_frame_info.n_rotate_regs);
3960 if (ia64_except_unwind_info () != UI_TARGET)
3963 /* Emit the .prologue directive. */
3966 grsave = grsave_prev = 0;
3967 if (current_frame_info.r[reg_save_b0] != 0)
3970 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3972 if (current_frame_info.r[reg_save_ar_pfs] != 0
3973 && (grsave_prev == 0
3974 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3977 if (grsave_prev == 0)
3978 grsave = current_frame_info.r[reg_save_ar_pfs];
3979 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3981 if (current_frame_info.r[reg_fp] != 0
3982 && (grsave_prev == 0
3983 || current_frame_info.r[reg_fp] == grsave_prev + 1))
3986 if (grsave_prev == 0)
3987 grsave = HARD_FRAME_POINTER_REGNUM;
3988 grsave_prev = current_frame_info.r[reg_fp];
3990 if (current_frame_info.r[reg_save_pr] != 0
3991 && (grsave_prev == 0
3992 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
3995 if (grsave_prev == 0)
3996 grsave = current_frame_info.r[reg_save_pr];
3999 if (mask && TARGET_GNU_AS)
4000 fprintf (file, "\t.prologue %d, %d\n", mask,
4001 ia64_dbx_register_number (grsave));
4003 fputs ("\t.prologue\n", file);
4005 /* Emit a .spill directive, if necessary, to relocate the base of
4006 the register spill area. */
4007 if (current_frame_info.spill_cfa_off != -16)
4008 fprintf (file, "\t.spill %ld\n",
4009 (long) (current_frame_info.spill_cfa_off
4010 + current_frame_info.spill_size));
4013 /* Emit the .body directive at the scheduled end of the prologue. */
4016 ia64_output_function_end_prologue (FILE *file)
4018 if (ia64_except_unwind_info () != UI_TARGET)
4021 fputs ("\t.body\n", file);
4024 /* Emit the function epilogue. */
4027 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4028 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4032 if (current_frame_info.r[reg_fp])
4034 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4035 reg_names[HARD_FRAME_POINTER_REGNUM]
4036 = reg_names[current_frame_info.r[reg_fp]];
4037 reg_names[current_frame_info.r[reg_fp]] = tmp;
4038 reg_emitted (reg_fp);
4040 if (! TARGET_REG_NAMES)
4042 for (i = 0; i < current_frame_info.n_input_regs; i++)
4043 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4044 for (i = 0; i < current_frame_info.n_local_regs; i++)
4045 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4046 for (i = 0; i < current_frame_info.n_output_regs; i++)
4047 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4050 current_frame_info.initialized = 0;
4054 ia64_dbx_register_number (int regno)
4056 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4057 from its home at loc79 to something inside the register frame. We
4058 must perform the same renumbering here for the debug info. */
4059 if (current_frame_info.r[reg_fp])
4061 if (regno == HARD_FRAME_POINTER_REGNUM)
4062 regno = current_frame_info.r[reg_fp];
4063 else if (regno == current_frame_info.r[reg_fp])
4064 regno = HARD_FRAME_POINTER_REGNUM;
4067 if (IN_REGNO_P (regno))
4068 return 32 + regno - IN_REG (0);
4069 else if (LOC_REGNO_P (regno))
4070 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4071 else if (OUT_REGNO_P (regno))
4072 return (32 + current_frame_info.n_input_regs
4073 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4078 /* Implement TARGET_TRAMPOLINE_INIT.
4080 The trampoline should set the static chain pointer to value placed
4081 into the trampoline and should branch to the specified routine.
4082 To make the normal indirect-subroutine calling convention work,
4083 the trampoline must look like a function descriptor; the first
4084 word being the target address and the second being the target's
4087 We abuse the concept of a global pointer by arranging for it
4088 to point to the data we need to load. The complete trampoline
4089 has the following form:
4091 +-------------------+ \
4092 TRAMP: | __ia64_trampoline | |
4093 +-------------------+ > fake function descriptor
4095 +-------------------+ /
4096 | target descriptor |
4097 +-------------------+
4099 +-------------------+
4103 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4105 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4106 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4108 /* The Intel assembler requires that the global __ia64_trampoline symbol
4109 be declared explicitly */
4112 static bool declared_ia64_trampoline = false;
4114 if (!declared_ia64_trampoline)
4116 declared_ia64_trampoline = true;
4117 (*targetm.asm_out.globalize_label) (asm_out_file,
4118 "__ia64_trampoline");
4122 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4123 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4124 fnaddr = convert_memory_address (Pmode, fnaddr);
4125 static_chain = convert_memory_address (Pmode, static_chain);
4127 /* Load up our iterator. */
4128 addr_reg = copy_to_reg (addr);
4129 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4131 /* The first two words are the fake descriptor:
4132 __ia64_trampoline, ADDR+16. */
4133 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4134 if (TARGET_ABI_OPEN_VMS)
4136 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4137 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4138 relocation against function symbols to make it identical to the
4139 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4140 strict ELF and dereference to get the bare code address. */
4141 rtx reg = gen_reg_rtx (Pmode);
4142 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4143 emit_move_insn (reg, tramp);
4144 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4147 emit_move_insn (m_tramp, tramp);
4148 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4149 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4151 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4152 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4153 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4155 /* The third word is the target descriptor. */
4156 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4157 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4158 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4160 /* The fourth word is the static chain. */
4161 emit_move_insn (m_tramp, static_chain);
4164 /* Do any needed setup for a variadic function. CUM has not been updated
4165 for the last named argument which has type TYPE and mode MODE.
4167 We generate the actual spill instructions during prologue generation. */
4170 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4171 tree type, int * pretend_size,
4172 int second_time ATTRIBUTE_UNUSED)
4174 CUMULATIVE_ARGS next_cum = *cum;
4176 /* Skip the current argument. */
4177 ia64_function_arg_advance (&next_cum, mode, type, 1);
4179 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4181 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4182 *pretend_size = n * UNITS_PER_WORD;
4183 cfun->machine->n_varargs = n;
4187 /* Check whether TYPE is a homogeneous floating point aggregate. If
4188 it is, return the mode of the floating point type that appears
4189 in all leafs. If it is not, return VOIDmode.
4191 An aggregate is a homogeneous floating point aggregate is if all
4192 fields/elements in it have the same floating point type (e.g,
4193 SFmode). 128-bit quad-precision floats are excluded.
4195 Variable sized aggregates should never arrive here, since we should
4196 have already decided to pass them by reference. Top-level zero-sized
4197 aggregates are excluded because our parallels crash the middle-end. */
4199 static enum machine_mode
4200 hfa_element_mode (const_tree type, bool nested)
4202 enum machine_mode element_mode = VOIDmode;
4203 enum machine_mode mode;
4204 enum tree_code code = TREE_CODE (type);
4205 int know_element_mode = 0;
4208 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4213 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4214 case BOOLEAN_TYPE: case POINTER_TYPE:
4215 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4216 case LANG_TYPE: case FUNCTION_TYPE:
4219 /* Fortran complex types are supposed to be HFAs, so we need to handle
4220 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4223 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4224 && TYPE_MODE (type) != TCmode)
4225 return GET_MODE_INNER (TYPE_MODE (type));
4230 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4231 mode if this is contained within an aggregate. */
4232 if (nested && TYPE_MODE (type) != TFmode)
4233 return TYPE_MODE (type);
4238 return hfa_element_mode (TREE_TYPE (type), 1);
4242 case QUAL_UNION_TYPE:
4243 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4245 if (TREE_CODE (t) != FIELD_DECL)
4248 mode = hfa_element_mode (TREE_TYPE (t), 1);
4249 if (know_element_mode)
4251 if (mode != element_mode)
4254 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4258 know_element_mode = 1;
4259 element_mode = mode;
4262 return element_mode;
4265 /* If we reach here, we probably have some front-end specific type
4266 that the backend doesn't know about. This can happen via the
4267 aggregate_value_p call in init_function_start. All we can do is
4268 ignore unknown tree types. */
4275 /* Return the number of words required to hold a quantity of TYPE and MODE
4276 when passed as an argument. */
4278 ia64_function_arg_words (const_tree type, enum machine_mode mode)
4282 if (mode == BLKmode)
4283 words = int_size_in_bytes (type);
4285 words = GET_MODE_SIZE (mode);
4287 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4290 /* Return the number of registers that should be skipped so the current
4291 argument (described by TYPE and WORDS) will be properly aligned.
4293 Integer and float arguments larger than 8 bytes start at the next
4294 even boundary. Aggregates larger than 8 bytes start at the next
4295 even boundary if the aggregate has 16 byte alignment. Note that
4296 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4297 but are still to be aligned in registers.
4299 ??? The ABI does not specify how to handle aggregates with
4300 alignment from 9 to 15 bytes, or greater than 16. We handle them
4301 all as if they had 16 byte alignment. Such aggregates can occur
4302 only if gcc extensions are used. */
4304 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4305 const_tree type, int words)
4307 /* No registers are skipped on VMS. */
4308 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4312 && TREE_CODE (type) != INTEGER_TYPE
4313 && TREE_CODE (type) != REAL_TYPE)
4314 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4319 /* Return rtx for register where argument is passed, or zero if it is passed
4321 /* ??? 128-bit quad-precision floats are always passed in general
4325 ia64_function_arg_1 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
4326 const_tree type, bool named, bool incoming)
4328 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4329 int words = ia64_function_arg_words (type, mode);
4330 int offset = ia64_function_arg_offset (cum, type, words);
4331 enum machine_mode hfa_mode = VOIDmode;
4333 /* For OPEN VMS, emit the instruction setting up the argument register here,
4334 when we know this will be together with the other arguments setup related
4335 insns. This is not the conceptually best place to do this, but this is
4336 the easiest as we have convenient access to cumulative args info. */
4338 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4341 unsigned HOST_WIDE_INT regval = cum->words;
4344 for (i = 0; i < 8; i++)
4345 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4347 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4351 /* If all argument slots are used, then it must go on the stack. */
4352 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4355 /* Check for and handle homogeneous FP aggregates. */
4357 hfa_mode = hfa_element_mode (type, 0);
4359 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4360 and unprototyped hfas are passed specially. */
4361 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4365 int fp_regs = cum->fp_regs;
4366 int int_regs = cum->words + offset;
4367 int hfa_size = GET_MODE_SIZE (hfa_mode);
4371 /* If prototyped, pass it in FR regs then GR regs.
4372 If not prototyped, pass it in both FR and GR regs.
4374 If this is an SFmode aggregate, then it is possible to run out of
4375 FR regs while GR regs are still left. In that case, we pass the
4376 remaining part in the GR regs. */
4378 /* Fill the FP regs. We do this always. We stop if we reach the end
4379 of the argument, the last FP register, or the last argument slot. */
4381 byte_size = ((mode == BLKmode)
4382 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4383 args_byte_size = int_regs * UNITS_PER_WORD;
4385 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4386 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4388 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4389 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4393 args_byte_size += hfa_size;
4397 /* If no prototype, then the whole thing must go in GR regs. */
4398 if (! cum->prototype)
4400 /* If this is an SFmode aggregate, then we might have some left over
4401 that needs to go in GR regs. */
4402 else if (byte_size != offset)
4403 int_regs += offset / UNITS_PER_WORD;
4405 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4407 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4409 enum machine_mode gr_mode = DImode;
4410 unsigned int gr_size;
4412 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4413 then this goes in a GR reg left adjusted/little endian, right
4414 adjusted/big endian. */
4415 /* ??? Currently this is handled wrong, because 4-byte hunks are
4416 always right adjusted/little endian. */
4419 /* If we have an even 4 byte hunk because the aggregate is a
4420 multiple of 4 bytes in size, then this goes in a GR reg right
4421 adjusted/little endian. */
4422 else if (byte_size - offset == 4)
4425 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4426 gen_rtx_REG (gr_mode, (basereg
4430 gr_size = GET_MODE_SIZE (gr_mode);
4432 if (gr_size == UNITS_PER_WORD
4433 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4435 else if (gr_size > UNITS_PER_WORD)
4436 int_regs += gr_size / UNITS_PER_WORD;
4438 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4441 /* On OpenVMS variable argument is either in Rn or Fn. */
4442 else if (TARGET_ABI_OPEN_VMS && named == 0)
4444 if (FLOAT_MODE_P (mode))
4445 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4447 return gen_rtx_REG (mode, basereg + cum->words);
4450 /* Integral and aggregates go in general registers. If we have run out of
4451 FR registers, then FP values must also go in general registers. This can
4452 happen when we have a SFmode HFA. */
4453 else if (mode == TFmode || mode == TCmode
4454 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4456 int byte_size = ((mode == BLKmode)
4457 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4458 if (BYTES_BIG_ENDIAN
4459 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4460 && byte_size < UNITS_PER_WORD
4463 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4464 gen_rtx_REG (DImode,
4465 (basereg + cum->words
4468 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4471 return gen_rtx_REG (mode, basereg + cum->words + offset);
4475 /* If there is a prototype, then FP values go in a FR register when
4476 named, and in a GR register when unnamed. */
4477 else if (cum->prototype)
4480 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4481 /* In big-endian mode, an anonymous SFmode value must be represented
4482 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4483 the value into the high half of the general register. */
4484 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4485 return gen_rtx_PARALLEL (mode,
4487 gen_rtx_EXPR_LIST (VOIDmode,
4488 gen_rtx_REG (DImode, basereg + cum->words + offset),
4491 return gen_rtx_REG (mode, basereg + cum->words + offset);
4493 /* If there is no prototype, then FP values go in both FR and GR
4497 /* See comment above. */
4498 enum machine_mode inner_mode =
4499 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4501 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4502 gen_rtx_REG (mode, (FR_ARG_FIRST
4505 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4506 gen_rtx_REG (inner_mode,
4507 (basereg + cum->words
4511 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4515 /* Implement TARGET_FUNCION_ARG target hook. */
4518 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4519 const_tree type, bool named)
4521 return ia64_function_arg_1 (cum, mode, type, named, false);
4524 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4527 ia64_function_incoming_arg (CUMULATIVE_ARGS *cum,
4528 enum machine_mode mode,
4529 const_tree type, bool named)
4531 return ia64_function_arg_1 (cum, mode, type, named, true);
4534 /* Return number of bytes, at the beginning of the argument, that must be
4535 put in registers. 0 is the argument is entirely in registers or entirely
4539 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4540 tree type, bool named ATTRIBUTE_UNUSED)
4542 int words = ia64_function_arg_words (type, mode);
4543 int offset = ia64_function_arg_offset (cum, type, words);
4545 /* If all argument slots are used, then it must go on the stack. */
4546 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4549 /* It doesn't matter whether the argument goes in FR or GR regs. If
4550 it fits within the 8 argument slots, then it goes entirely in
4551 registers. If it extends past the last argument slot, then the rest
4552 goes on the stack. */
4554 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4557 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4560 /* Return ivms_arg_type based on machine_mode. */
4562 static enum ivms_arg_type
4563 ia64_arg_type (enum machine_mode mode)
4576 /* Update CUM to point after this argument. This is patterned after
4577 ia64_function_arg. */
4580 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4581 const_tree type, bool named)
4583 int words = ia64_function_arg_words (type, mode);
4584 int offset = ia64_function_arg_offset (cum, type, words);
4585 enum machine_mode hfa_mode = VOIDmode;
4587 /* If all arg slots are already full, then there is nothing to do. */
4588 if (cum->words >= MAX_ARGUMENT_SLOTS)
4590 cum->words += words + offset;
4594 cum->atypes[cum->words] = ia64_arg_type (mode);
4595 cum->words += words + offset;
4597 /* Check for and handle homogeneous FP aggregates. */
4599 hfa_mode = hfa_element_mode (type, 0);
4601 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4602 and unprototyped hfas are passed specially. */
4603 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4605 int fp_regs = cum->fp_regs;
4606 /* This is the original value of cum->words + offset. */
4607 int int_regs = cum->words - words;
4608 int hfa_size = GET_MODE_SIZE (hfa_mode);
4612 /* If prototyped, pass it in FR regs then GR regs.
4613 If not prototyped, pass it in both FR and GR regs.
4615 If this is an SFmode aggregate, then it is possible to run out of
4616 FR regs while GR regs are still left. In that case, we pass the
4617 remaining part in the GR regs. */
4619 /* Fill the FP regs. We do this always. We stop if we reach the end
4620 of the argument, the last FP register, or the last argument slot. */
4622 byte_size = ((mode == BLKmode)
4623 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4624 args_byte_size = int_regs * UNITS_PER_WORD;
4626 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4627 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4630 args_byte_size += hfa_size;
4634 cum->fp_regs = fp_regs;
4637 /* On OpenVMS variable argument is either in Rn or Fn. */
4638 else if (TARGET_ABI_OPEN_VMS && named == 0)
4640 cum->int_regs = cum->words;
4641 cum->fp_regs = cum->words;
4644 /* Integral and aggregates go in general registers. So do TFmode FP values.
4645 If we have run out of FR registers, then other FP values must also go in
4646 general registers. This can happen when we have a SFmode HFA. */
4647 else if (mode == TFmode || mode == TCmode
4648 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4649 cum->int_regs = cum->words;
4651 /* If there is a prototype, then FP values go in a FR register when
4652 named, and in a GR register when unnamed. */
4653 else if (cum->prototype)
4656 cum->int_regs = cum->words;
4658 /* ??? Complex types should not reach here. */
4659 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4661 /* If there is no prototype, then FP values go in both FR and GR
4665 /* ??? Complex types should not reach here. */
4666 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4667 cum->int_regs = cum->words;
4671 /* Arguments with alignment larger than 8 bytes start at the next even
4672 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4673 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4676 ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
4678 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4679 return PARM_BOUNDARY * 2;
4683 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4684 return PARM_BOUNDARY * 2;
4686 return PARM_BOUNDARY;
4689 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4690 return PARM_BOUNDARY * 2;
4692 return PARM_BOUNDARY;
4695 /* True if it is OK to do sibling call optimization for the specified
4696 call expression EXP. DECL will be the called function, or NULL if
4697 this is an indirect call. */
4699 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4701 /* We can't perform a sibcall if the current function has the syscall_linkage
4703 if (lookup_attribute ("syscall_linkage",
4704 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4707 /* We must always return with our current GP. This means we can
4708 only sibcall to functions defined in the current module unless
4709 TARGET_CONST_GP is set to true. */
4710 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4714 /* Implement va_arg. */
4717 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4720 /* Variable sized types are passed by reference. */
4721 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4723 tree ptrtype = build_pointer_type (type);
4724 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4725 return build_va_arg_indirect_ref (addr);
4728 /* Aggregate arguments with alignment larger than 8 bytes start at
4729 the next even boundary. Integer and floating point arguments
4730 do so if they are larger than 8 bytes, whether or not they are
4731 also aligned larger than 8 bytes. */
4732 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4733 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4735 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4736 size_int (2 * UNITS_PER_WORD - 1));
4737 t = fold_convert (sizetype, t);
4738 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4739 size_int (-2 * UNITS_PER_WORD));
4740 t = fold_convert (TREE_TYPE (valist), t);
4741 gimplify_assign (unshare_expr (valist), t, pre_p);
4744 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4747 /* Return 1 if function return value returned in memory. Return 0 if it is
4751 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4753 enum machine_mode mode;
4754 enum machine_mode hfa_mode;
4755 HOST_WIDE_INT byte_size;
4757 mode = TYPE_MODE (valtype);
4758 byte_size = GET_MODE_SIZE (mode);
4759 if (mode == BLKmode)
4761 byte_size = int_size_in_bytes (valtype);
4766 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4768 hfa_mode = hfa_element_mode (valtype, 0);
4769 if (hfa_mode != VOIDmode)
4771 int hfa_size = GET_MODE_SIZE (hfa_mode);
4773 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4778 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4784 /* Return rtx for register that holds the function return value. */
4787 ia64_function_value (const_tree valtype,
4788 const_tree fn_decl_or_type,
4789 bool outgoing ATTRIBUTE_UNUSED)
4791 enum machine_mode mode;
4792 enum machine_mode hfa_mode;
4794 const_tree func = fn_decl_or_type;
4797 && !DECL_P (fn_decl_or_type))
4800 mode = TYPE_MODE (valtype);
4801 hfa_mode = hfa_element_mode (valtype, 0);
4803 if (hfa_mode != VOIDmode)
4811 hfa_size = GET_MODE_SIZE (hfa_mode);
4812 byte_size = ((mode == BLKmode)
4813 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4815 for (i = 0; offset < byte_size; i++)
4817 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4818 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4822 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4824 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4825 return gen_rtx_REG (mode, FR_ARG_FIRST);
4828 bool need_parallel = false;
4830 /* In big-endian mode, we need to manage the layout of aggregates
4831 in the registers so that we get the bits properly aligned in
4832 the highpart of the registers. */
4833 if (BYTES_BIG_ENDIAN
4834 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4835 need_parallel = true;
4837 /* Something like struct S { long double x; char a[0] } is not an
4838 HFA structure, and therefore doesn't go in fp registers. But
4839 the middle-end will give it XFmode anyway, and XFmode values
4840 don't normally fit in integer registers. So we need to smuggle
4841 the value inside a parallel. */
4842 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4843 need_parallel = true;
4853 bytesize = int_size_in_bytes (valtype);
4854 /* An empty PARALLEL is invalid here, but the return value
4855 doesn't matter for empty structs. */
4857 return gen_rtx_REG (mode, GR_RET_FIRST);
4858 for (i = 0; offset < bytesize; i++)
4860 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4861 gen_rtx_REG (DImode,
4864 offset += UNITS_PER_WORD;
4866 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4869 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4870 func ? TREE_TYPE (func) : NULL_TREE,
4873 return gen_rtx_REG (mode, GR_RET_FIRST);
4877 /* Worker function for TARGET_LIBCALL_VALUE. */
4880 ia64_libcall_value (enum machine_mode mode,
4881 const_rtx fun ATTRIBUTE_UNUSED)
4883 return gen_rtx_REG (mode,
4884 (((GET_MODE_CLASS (mode) == MODE_FLOAT
4885 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4886 && (mode) != TFmode)
4887 ? FR_RET_FIRST : GR_RET_FIRST));
4890 /* Worker function for FUNCTION_VALUE_REGNO_P. */
4893 ia64_function_value_regno_p (const unsigned int regno)
4895 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
4896 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
4899 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4900 We need to emit DTP-relative relocations. */
4903 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4905 gcc_assert (size == 4 || size == 8);
4907 fputs ("\tdata4.ua\t@dtprel(", file);
4909 fputs ("\tdata8.ua\t@dtprel(", file);
4910 output_addr_const (file, x);
4914 /* Print a memory address as an operand to reference that memory location. */
4916 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4917 also call this from ia64_print_operand for memory addresses. */
4920 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4921 rtx address ATTRIBUTE_UNUSED)
4925 /* Print an operand to an assembler instruction.
4926 C Swap and print a comparison operator.
4927 D Print an FP comparison operator.
4928 E Print 32 - constant, for SImode shifts as extract.
4929 e Print 64 - constant, for DImode rotates.
4930 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4931 a floating point register emitted normally.
4932 G A floating point constant.
4933 I Invert a predicate register by adding 1.
4934 J Select the proper predicate register for a condition.
4935 j Select the inverse predicate register for a condition.
4936 O Append .acq for volatile load.
4937 P Postincrement of a MEM.
4938 Q Append .rel for volatile store.
4939 R Print .s .d or nothing for a single, double or no truncation.
4940 S Shift amount for shladd instruction.
4941 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4942 for Intel assembler.
4943 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4944 for Intel assembler.
4945 X A pair of floating point registers.
4946 r Print register name, or constant 0 as r0. HP compatibility for
4948 v Print vector constant value as an 8-byte integer value. */
4951 ia64_print_operand (FILE * file, rtx x, int code)
4958 /* Handled below. */
4963 enum rtx_code c = swap_condition (GET_CODE (x));
4964 fputs (GET_RTX_NAME (c), file);
4969 switch (GET_CODE (x))
4993 str = GET_RTX_NAME (GET_CODE (x));
5000 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5004 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5008 if (x == CONST0_RTX (GET_MODE (x)))
5009 str = reg_names [FR_REG (0)];
5010 else if (x == CONST1_RTX (GET_MODE (x)))
5011 str = reg_names [FR_REG (1)];
5014 gcc_assert (GET_CODE (x) == REG);
5015 str = reg_names [REGNO (x)];
5024 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5025 real_to_target (val, &rv, GET_MODE (x));
5026 if (GET_MODE (x) == SFmode)
5027 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5028 else if (GET_MODE (x) == DFmode)
5029 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5031 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5034 output_operand_lossage ("invalid %%G mode");
5039 fputs (reg_names [REGNO (x) + 1], file);
5045 unsigned int regno = REGNO (XEXP (x, 0));
5046 if (GET_CODE (x) == EQ)
5050 fputs (reg_names [regno], file);
5055 if (MEM_VOLATILE_P (x))
5056 fputs(".acq", file);
5061 HOST_WIDE_INT value;
5063 switch (GET_CODE (XEXP (x, 0)))
5069 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5070 if (GET_CODE (x) == CONST_INT)
5074 gcc_assert (GET_CODE (x) == REG);
5075 fprintf (file, ", %s", reg_names[REGNO (x)]);
5081 value = GET_MODE_SIZE (GET_MODE (x));
5085 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5089 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5094 if (MEM_VOLATILE_P (x))
5095 fputs(".rel", file);
5099 if (x == CONST0_RTX (GET_MODE (x)))
5101 else if (x == CONST1_RTX (GET_MODE (x)))
5103 else if (x == CONST2_RTX (GET_MODE (x)))
5106 output_operand_lossage ("invalid %%R value");
5110 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5114 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5116 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5122 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5124 const char *prefix = "0x";
5125 if (INTVAL (x) & 0x80000000)
5127 fprintf (file, "0xffffffff");
5130 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5137 unsigned int regno = REGNO (x);
5138 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5143 /* If this operand is the constant zero, write it as register zero.
5144 Any register, zero, or CONST_INT value is OK here. */
5145 if (GET_CODE (x) == REG)
5146 fputs (reg_names[REGNO (x)], file);
5147 else if (x == CONST0_RTX (GET_MODE (x)))
5149 else if (GET_CODE (x) == CONST_INT)
5150 output_addr_const (file, x);
5152 output_operand_lossage ("invalid %%r value");
5156 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5157 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5164 /* For conditional branches, returns or calls, substitute
5165 sptk, dptk, dpnt, or spnt for %s. */
5166 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5169 int pred_val = INTVAL (XEXP (x, 0));
5171 /* Guess top and bottom 10% statically predicted. */
5172 if (pred_val < REG_BR_PROB_BASE / 50
5173 && br_prob_note_reliable_p (x))
5175 else if (pred_val < REG_BR_PROB_BASE / 2)
5177 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5178 || !br_prob_note_reliable_p (x))
5183 else if (GET_CODE (current_output_insn) == CALL_INSN)
5188 fputs (which, file);
5193 x = current_insn_predicate;
5196 unsigned int regno = REGNO (XEXP (x, 0));
5197 if (GET_CODE (x) == EQ)
5199 fprintf (file, "(%s) ", reg_names [regno]);
5204 output_operand_lossage ("ia64_print_operand: unknown code");
5208 switch (GET_CODE (x))
5210 /* This happens for the spill/restore instructions. */
5215 /* ... fall through ... */
5218 fputs (reg_names [REGNO (x)], file);
5223 rtx addr = XEXP (x, 0);
5224 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5225 addr = XEXP (addr, 0);
5226 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5231 output_addr_const (file, x);
5238 /* Compute a (partial) cost for rtx X. Return true if the complete
5239 cost has been computed, and false if subexpressions should be
5240 scanned. In either case, *TOTAL contains the cost result. */
5241 /* ??? This is incomplete. */
5244 ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5245 bool speed ATTRIBUTE_UNUSED)
5253 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5256 if (satisfies_constraint_I (x))
5258 else if (satisfies_constraint_J (x))
5261 *total = COSTS_N_INSNS (1);
5264 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5267 *total = COSTS_N_INSNS (1);
5272 *total = COSTS_N_INSNS (1);
5278 *total = COSTS_N_INSNS (3);
5282 *total = COSTS_N_INSNS (4);
5286 /* For multiplies wider than HImode, we have to go to the FPU,
5287 which normally involves copies. Plus there's the latency
5288 of the multiply itself, and the latency of the instructions to
5289 transfer integer regs to FP regs. */
5290 if (FLOAT_MODE_P (GET_MODE (x)))
5291 *total = COSTS_N_INSNS (4);
5292 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5293 *total = COSTS_N_INSNS (10);
5295 *total = COSTS_N_INSNS (2);
5300 if (FLOAT_MODE_P (GET_MODE (x)))
5302 *total = COSTS_N_INSNS (4);
5310 *total = COSTS_N_INSNS (1);
5317 /* We make divide expensive, so that divide-by-constant will be
5318 optimized to a multiply. */
5319 *total = COSTS_N_INSNS (60);
5327 /* Calculate the cost of moving data from a register in class FROM to
5328 one in class TO, using MODE. */
5331 ia64_register_move_cost (enum machine_mode mode, reg_class_t from_i,
5334 enum reg_class from = (enum reg_class) from_i;
5335 enum reg_class to = (enum reg_class) to_i;
5337 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5338 if (to == ADDL_REGS)
5340 if (from == ADDL_REGS)
5343 /* All costs are symmetric, so reduce cases by putting the
5344 lower number class as the destination. */
5347 enum reg_class tmp = to;
5348 to = from, from = tmp;
5351 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5352 so that we get secondary memory reloads. Between FR_REGS,
5353 we have to make this at least as expensive as memory_move_cost
5354 to avoid spectacularly poor register class preferencing. */
5355 if (mode == XFmode || mode == RFmode)
5357 if (to != GR_REGS || from != GR_REGS)
5358 return memory_move_cost (mode, to, false);
5366 /* Moving between PR registers takes two insns. */
5367 if (from == PR_REGS)
5369 /* Moving between PR and anything but GR is impossible. */
5370 if (from != GR_REGS)
5371 return memory_move_cost (mode, to, false);
5375 /* Moving between BR and anything but GR is impossible. */
5376 if (from != GR_REGS && from != GR_AND_BR_REGS)
5377 return memory_move_cost (mode, to, false);
5382 /* Moving between AR and anything but GR is impossible. */
5383 if (from != GR_REGS)
5384 return memory_move_cost (mode, to, false);
5390 case GR_AND_FR_REGS:
5391 case GR_AND_BR_REGS:
5402 /* Calculate the cost of moving data of MODE from a register to or from
5406 ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5408 bool in ATTRIBUTE_UNUSED)
5410 if (rclass == GENERAL_REGS
5411 || rclass == FR_REGS
5412 || rclass == FP_REGS
5413 || rclass == GR_AND_FR_REGS)
5419 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5420 on RCLASS to use when copying X into that class. */
5423 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5429 /* Don't allow volatile mem reloads into floating point registers.
5430 This is defined to force reload to choose the r/m case instead
5431 of the f/f case when reloading (set (reg fX) (mem/v)). */
5432 if (MEM_P (x) && MEM_VOLATILE_P (x))
5435 /* Force all unrecognized constants into the constant pool. */
5453 /* This function returns the register class required for a secondary
5454 register when copying between one of the registers in RCLASS, and X,
5455 using MODE. A return value of NO_REGS means that no secondary register
5459 ia64_secondary_reload_class (enum reg_class rclass,
5460 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5464 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5465 regno = true_regnum (x);
5472 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5473 interaction. We end up with two pseudos with overlapping lifetimes
5474 both of which are equiv to the same constant, and both which need
5475 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5476 changes depending on the path length, which means the qty_first_reg
5477 check in make_regs_eqv can give different answers at different times.
5478 At some point I'll probably need a reload_indi pattern to handle
5481 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5482 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5483 non-general registers for good measure. */
5484 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5487 /* This is needed if a pseudo used as a call_operand gets spilled to a
5489 if (GET_CODE (x) == MEM)
5495 /* Need to go through general registers to get to other class regs. */
5496 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5499 /* This can happen when a paradoxical subreg is an operand to the
5501 /* ??? This shouldn't be necessary after instruction scheduling is
5502 enabled, because paradoxical subregs are not accepted by
5503 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5504 stop the paradoxical subreg stupidity in the *_operand functions
5506 if (GET_CODE (x) == MEM
5507 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5508 || GET_MODE (x) == QImode))
5511 /* This can happen because of the ior/and/etc patterns that accept FP
5512 registers as operands. If the third operand is a constant, then it
5513 needs to be reloaded into a FP register. */
5514 if (GET_CODE (x) == CONST_INT)
5517 /* This can happen because of register elimination in a muldi3 insn.
5518 E.g. `26107 * (unsigned long)&u'. */
5519 if (GET_CODE (x) == PLUS)
5524 /* ??? This happens if we cse/gcse a BImode value across a call,
5525 and the function has a nonlocal goto. This is because global
5526 does not allocate call crossing pseudos to hard registers when
5527 crtl->has_nonlocal_goto is true. This is relatively
5528 common for C++ programs that use exceptions. To reproduce,
5529 return NO_REGS and compile libstdc++. */
5530 if (GET_CODE (x) == MEM)
5533 /* This can happen when we take a BImode subreg of a DImode value,
5534 and that DImode value winds up in some non-GR register. */
5535 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5547 /* Implement targetm.unspec_may_trap_p hook. */
5549 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5551 if (GET_CODE (x) == UNSPEC)
5553 switch (XINT (x, 1))
5559 case UNSPEC_CHKACLR:
5561 /* These unspecs are just wrappers. */
5562 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5566 return default_unspec_may_trap_p (x, flags);
5570 /* Parse the -mfixed-range= option string. */
5573 fix_range (const char *const_str)
5576 char *str, *dash, *comma;
5578 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5579 REG2 are either register names or register numbers. The effect
5580 of this option is to mark the registers in the range from REG1 to
5581 REG2 as ``fixed'' so they won't be used by the compiler. This is
5582 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5584 i = strlen (const_str);
5585 str = (char *) alloca (i + 1);
5586 memcpy (str, const_str, i + 1);
5590 dash = strchr (str, '-');
5593 warning (0, "value of -mfixed-range must have form REG1-REG2");
5598 comma = strchr (dash + 1, ',');
5602 first = decode_reg_name (str);
5605 warning (0, "unknown register name: %s", str);
5609 last = decode_reg_name (dash + 1);
5612 warning (0, "unknown register name: %s", dash + 1);
5620 warning (0, "%s-%s is an empty range", str, dash + 1);
5624 for (i = first; i <= last; ++i)
5625 fixed_regs[i] = call_used_regs[i] = 1;
5635 /* Implement TARGET_HANDLE_OPTION. */
5638 ia64_handle_option (size_t code, const char *arg, int value)
5642 case OPT_mfixed_range_:
5646 case OPT_mtls_size_:
5647 if (value != 14 && value != 22 && value != 64)
5648 error ("bad value %<%s%> for -mtls-size= switch", arg);
5655 const char *name; /* processor name or nickname. */
5656 enum processor_type processor;
5658 const processor_alias_table[] =
5660 {"itanium2", PROCESSOR_ITANIUM2},
5661 {"mckinley", PROCESSOR_ITANIUM2},
5663 int const pta_size = ARRAY_SIZE (processor_alias_table);
5666 for (i = 0; i < pta_size; i++)
5667 if (!strcmp (arg, processor_alias_table[i].name))
5669 ia64_tune = processor_alias_table[i].processor;
5673 error ("bad value %<%s%> for -mtune= switch", arg);
5682 /* Implement TARGET_OPTION_OVERRIDE. */
5685 ia64_option_override (void)
5687 if (TARGET_AUTO_PIC)
5688 target_flags |= MASK_CONST_GP;
5690 /* Numerous experiment shows that IRA based loop pressure
5691 calculation works better for RTL loop invariant motion on targets
5692 with enough (>= 32) registers. It is an expensive optimization.
5693 So it is on only for peak performance. */
5695 flag_ira_loop_pressure = 1;
5698 ia64_section_threshold = (global_options_set.x_g_switch_value
5700 : IA64_DEFAULT_GVALUE);
5702 init_machine_status = ia64_init_machine_status;
5704 if (align_functions <= 0)
5705 align_functions = 64;
5706 if (align_loops <= 0)
5708 if (TARGET_ABI_OPEN_VMS)
5711 ia64_override_options_after_change();
5714 /* Implement targetm.override_options_after_change. */
5717 ia64_override_options_after_change (void)
5719 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5720 flag_schedule_insns_after_reload = 0;
5723 && !global_options_set.x_flag_selective_scheduling
5724 && !global_options_set.x_flag_selective_scheduling2)
5726 flag_selective_scheduling2 = 1;
5727 flag_sel_sched_pipelining = 1;
5729 if (mflag_sched_control_spec == 2)
5731 /* Control speculation is on by default for the selective scheduler,
5732 but not for the Haifa scheduler. */
5733 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5735 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5737 /* FIXME: remove this when we'd implement breaking autoinsns as
5738 a transformation. */
5739 flag_auto_inc_dec = 0;
5743 /* Initialize the record of emitted frame related registers. */
5745 void ia64_init_expanders (void)
5747 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5750 static struct machine_function *
5751 ia64_init_machine_status (void)
5753 return ggc_alloc_cleared_machine_function ();
5756 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5757 static enum attr_type ia64_safe_type (rtx);
5759 static enum attr_itanium_class
5760 ia64_safe_itanium_class (rtx insn)
5762 if (recog_memoized (insn) >= 0)
5763 return get_attr_itanium_class (insn);
5764 else if (DEBUG_INSN_P (insn))
5765 return ITANIUM_CLASS_IGNORE;
5767 return ITANIUM_CLASS_UNKNOWN;
5770 static enum attr_type
5771 ia64_safe_type (rtx insn)
5773 if (recog_memoized (insn) >= 0)
5774 return get_attr_type (insn);
5776 return TYPE_UNKNOWN;
5779 /* The following collection of routines emit instruction group stop bits as
5780 necessary to avoid dependencies. */
5782 /* Need to track some additional registers as far as serialization is
5783 concerned so we can properly handle br.call and br.ret. We could
5784 make these registers visible to gcc, but since these registers are
5785 never explicitly used in gcc generated code, it seems wasteful to
5786 do so (plus it would make the call and return patterns needlessly
5788 #define REG_RP (BR_REG (0))
5789 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5790 /* This is used for volatile asms which may require a stop bit immediately
5791 before and after them. */
5792 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5793 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5794 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5796 /* For each register, we keep track of how it has been written in the
5797 current instruction group.
5799 If a register is written unconditionally (no qualifying predicate),
5800 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5802 If a register is written if its qualifying predicate P is true, we
5803 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5804 may be written again by the complement of P (P^1) and when this happens,
5805 WRITE_COUNT gets set to 2.
5807 The result of this is that whenever an insn attempts to write a register
5808 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5810 If a predicate register is written by a floating-point insn, we set
5811 WRITTEN_BY_FP to true.
5813 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5814 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5816 #if GCC_VERSION >= 4000
5817 #define RWS_FIELD_TYPE __extension__ unsigned short
5819 #define RWS_FIELD_TYPE unsigned int
5821 struct reg_write_state
5823 RWS_FIELD_TYPE write_count : 2;
5824 RWS_FIELD_TYPE first_pred : 10;
5825 RWS_FIELD_TYPE written_by_fp : 1;
5826 RWS_FIELD_TYPE written_by_and : 1;
5827 RWS_FIELD_TYPE written_by_or : 1;
5830 /* Cumulative info for the current instruction group. */
5831 struct reg_write_state rws_sum[NUM_REGS];
5832 #ifdef ENABLE_CHECKING
5833 /* Bitmap whether a register has been written in the current insn. */
5834 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5835 / HOST_BITS_PER_WIDEST_FAST_INT];
5838 rws_insn_set (int regno)
5840 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5841 SET_HARD_REG_BIT (rws_insn, regno);
5845 rws_insn_test (int regno)
5847 return TEST_HARD_REG_BIT (rws_insn, regno);
5850 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5851 unsigned char rws_insn[2];
5854 rws_insn_set (int regno)
5856 if (regno == REG_AR_CFM)
5858 else if (regno == REG_VOLATILE)
5863 rws_insn_test (int regno)
5865 if (regno == REG_AR_CFM)
5867 if (regno == REG_VOLATILE)
5873 /* Indicates whether this is the first instruction after a stop bit,
5874 in which case we don't need another stop bit. Without this,
5875 ia64_variable_issue will die when scheduling an alloc. */
5876 static int first_instruction;
5878 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5879 RTL for one instruction. */
5882 unsigned int is_write : 1; /* Is register being written? */
5883 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5884 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5885 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5886 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5887 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5890 static void rws_update (int, struct reg_flags, int);
5891 static int rws_access_regno (int, struct reg_flags, int);
5892 static int rws_access_reg (rtx, struct reg_flags, int);
5893 static void update_set_flags (rtx, struct reg_flags *);
5894 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5895 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5896 static void init_insn_group_barriers (void);
5897 static int group_barrier_needed (rtx);
5898 static int safe_group_barrier_needed (rtx);
5899 static int in_safe_group_barrier;
5901 /* Update *RWS for REGNO, which is being written by the current instruction,
5902 with predicate PRED, and associated register flags in FLAGS. */
5905 rws_update (int regno, struct reg_flags flags, int pred)
5908 rws_sum[regno].write_count++;
5910 rws_sum[regno].write_count = 2;
5911 rws_sum[regno].written_by_fp |= flags.is_fp;
5912 /* ??? Not tracking and/or across differing predicates. */
5913 rws_sum[regno].written_by_and = flags.is_and;
5914 rws_sum[regno].written_by_or = flags.is_or;
5915 rws_sum[regno].first_pred = pred;
5918 /* Handle an access to register REGNO of type FLAGS using predicate register
5919 PRED. Update rws_sum array. Return 1 if this access creates
5920 a dependency with an earlier instruction in the same group. */
5923 rws_access_regno (int regno, struct reg_flags flags, int pred)
5925 int need_barrier = 0;
5927 gcc_assert (regno < NUM_REGS);
5929 if (! PR_REGNO_P (regno))
5930 flags.is_and = flags.is_or = 0;
5936 rws_insn_set (regno);
5937 write_count = rws_sum[regno].write_count;
5939 switch (write_count)
5942 /* The register has not been written yet. */
5943 if (!in_safe_group_barrier)
5944 rws_update (regno, flags, pred);
5948 /* The register has been written via a predicate. Treat
5949 it like a unconditional write and do not try to check
5950 for complementary pred reg in earlier write. */
5951 if (flags.is_and && rws_sum[regno].written_by_and)
5953 else if (flags.is_or && rws_sum[regno].written_by_or)
5957 if (!in_safe_group_barrier)
5958 rws_update (regno, flags, pred);
5962 /* The register has been unconditionally written already. We
5964 if (flags.is_and && rws_sum[regno].written_by_and)
5966 else if (flags.is_or && rws_sum[regno].written_by_or)
5970 if (!in_safe_group_barrier)
5972 rws_sum[regno].written_by_and = flags.is_and;
5973 rws_sum[regno].written_by_or = flags.is_or;
5983 if (flags.is_branch)
5985 /* Branches have several RAW exceptions that allow to avoid
5988 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5989 /* RAW dependencies on branch regs are permissible as long
5990 as the writer is a non-branch instruction. Since we
5991 never generate code that uses a branch register written
5992 by a branch instruction, handling this case is
5996 if (REGNO_REG_CLASS (regno) == PR_REGS
5997 && ! rws_sum[regno].written_by_fp)
5998 /* The predicates of a branch are available within the
5999 same insn group as long as the predicate was written by
6000 something other than a floating-point instruction. */
6004 if (flags.is_and && rws_sum[regno].written_by_and)
6006 if (flags.is_or && rws_sum[regno].written_by_or)
6009 switch (rws_sum[regno].write_count)
6012 /* The register has not been written yet. */
6016 /* The register has been written via a predicate, assume we
6017 need a barrier (don't check for complementary regs). */
6022 /* The register has been unconditionally written already. We
6032 return need_barrier;
6036 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6038 int regno = REGNO (reg);
6039 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6042 return rws_access_regno (regno, flags, pred);
6045 int need_barrier = 0;
6047 need_barrier |= rws_access_regno (regno + n, flags, pred);
6048 return need_barrier;
6052 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6053 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6056 update_set_flags (rtx x, struct reg_flags *pflags)
6058 rtx src = SET_SRC (x);
6060 switch (GET_CODE (src))
6066 /* There are four cases here:
6067 (1) The destination is (pc), in which case this is a branch,
6068 nothing here applies.
6069 (2) The destination is ar.lc, in which case this is a
6070 doloop_end_internal,
6071 (3) The destination is an fp register, in which case this is
6072 an fselect instruction.
6073 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6074 this is a check load.
6075 In all cases, nothing we do in this function applies. */
6079 if (COMPARISON_P (src)
6080 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6081 /* Set pflags->is_fp to 1 so that we know we're dealing
6082 with a floating point comparison when processing the
6083 destination of the SET. */
6086 /* Discover if this is a parallel comparison. We only handle
6087 and.orcm and or.andcm at present, since we must retain a
6088 strict inverse on the predicate pair. */
6089 else if (GET_CODE (src) == AND)
6091 else if (GET_CODE (src) == IOR)
6098 /* Subroutine of rtx_needs_barrier; this function determines whether the
6099 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6100 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6104 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6106 int need_barrier = 0;
6108 rtx src = SET_SRC (x);
6110 if (GET_CODE (src) == CALL)
6111 /* We don't need to worry about the result registers that
6112 get written by subroutine call. */
6113 return rtx_needs_barrier (src, flags, pred);
6114 else if (SET_DEST (x) == pc_rtx)
6116 /* X is a conditional branch. */
6117 /* ??? This seems redundant, as the caller sets this bit for
6119 if (!ia64_spec_check_src_p (src))
6120 flags.is_branch = 1;
6121 return rtx_needs_barrier (src, flags, pred);
6124 if (ia64_spec_check_src_p (src))
6125 /* Avoid checking one register twice (in condition
6126 and in 'then' section) for ldc pattern. */
6128 gcc_assert (REG_P (XEXP (src, 2)));
6129 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6131 /* We process MEM below. */
6132 src = XEXP (src, 1);
6135 need_barrier |= rtx_needs_barrier (src, flags, pred);
6138 if (GET_CODE (dst) == ZERO_EXTRACT)
6140 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6141 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6143 return need_barrier;
6146 /* Handle an access to rtx X of type FLAGS using predicate register
6147 PRED. Return 1 if this access creates a dependency with an earlier
6148 instruction in the same group. */
6151 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6154 int is_complemented = 0;
6155 int need_barrier = 0;
6156 const char *format_ptr;
6157 struct reg_flags new_flags;
6165 switch (GET_CODE (x))
6168 update_set_flags (x, &new_flags);
6169 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6170 if (GET_CODE (SET_SRC (x)) != CALL)
6172 new_flags.is_write = 1;
6173 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6178 new_flags.is_write = 0;
6179 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6181 /* Avoid multiple register writes, in case this is a pattern with
6182 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6183 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6185 new_flags.is_write = 1;
6186 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6187 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6188 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6193 /* X is a predicated instruction. */
6195 cond = COND_EXEC_TEST (x);
6197 need_barrier = rtx_needs_barrier (cond, flags, 0);
6199 if (GET_CODE (cond) == EQ)
6200 is_complemented = 1;
6201 cond = XEXP (cond, 0);
6202 gcc_assert (GET_CODE (cond) == REG
6203 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6204 pred = REGNO (cond);
6205 if (is_complemented)
6208 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6209 return need_barrier;
6213 /* Clobber & use are for earlier compiler-phases only. */
6218 /* We always emit stop bits for traditional asms. We emit stop bits
6219 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6220 if (GET_CODE (x) != ASM_OPERANDS
6221 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6223 /* Avoid writing the register multiple times if we have multiple
6224 asm outputs. This avoids a failure in rws_access_reg. */
6225 if (! rws_insn_test (REG_VOLATILE))
6227 new_flags.is_write = 1;
6228 rws_access_regno (REG_VOLATILE, new_flags, pred);
6233 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6234 We cannot just fall through here since then we would be confused
6235 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6236 traditional asms unlike their normal usage. */
6238 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6239 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6244 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6246 rtx pat = XVECEXP (x, 0, i);
6247 switch (GET_CODE (pat))
6250 update_set_flags (pat, &new_flags);
6251 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6257 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6268 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6270 rtx pat = XVECEXP (x, 0, i);
6271 if (GET_CODE (pat) == SET)
6273 if (GET_CODE (SET_SRC (pat)) != CALL)
6275 new_flags.is_write = 1;
6276 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6280 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6281 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6286 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6289 if (REGNO (x) == AR_UNAT_REGNUM)
6291 for (i = 0; i < 64; ++i)
6292 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6295 need_barrier = rws_access_reg (x, flags, pred);
6299 /* Find the regs used in memory address computation. */
6300 new_flags.is_write = 0;
6301 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6304 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6305 case SYMBOL_REF: case LABEL_REF: case CONST:
6308 /* Operators with side-effects. */
6309 case POST_INC: case POST_DEC:
6310 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6312 new_flags.is_write = 0;
6313 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6314 new_flags.is_write = 1;
6315 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6319 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6321 new_flags.is_write = 0;
6322 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6323 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6324 new_flags.is_write = 1;
6325 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6328 /* Handle common unary and binary ops for efficiency. */
6329 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6330 case MOD: case UDIV: case UMOD: case AND: case IOR:
6331 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6332 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6333 case NE: case EQ: case GE: case GT: case LE:
6334 case LT: case GEU: case GTU: case LEU: case LTU:
6335 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6336 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6339 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6340 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6341 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6342 case SQRT: case FFS: case POPCOUNT:
6343 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6347 /* VEC_SELECT's second argument is a PARALLEL with integers that
6348 describe the elements selected. On ia64, those integers are
6349 always constants. Avoid walking the PARALLEL so that we don't
6350 get confused with "normal" parallels and then die. */
6351 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6355 switch (XINT (x, 1))
6357 case UNSPEC_LTOFF_DTPMOD:
6358 case UNSPEC_LTOFF_DTPREL:
6360 case UNSPEC_LTOFF_TPREL:
6362 case UNSPEC_PRED_REL_MUTEX:
6363 case UNSPEC_PIC_CALL:
6365 case UNSPEC_FETCHADD_ACQ:
6366 case UNSPEC_BSP_VALUE:
6367 case UNSPEC_FLUSHRS:
6368 case UNSPEC_BUNDLE_SELECTOR:
6371 case UNSPEC_GR_SPILL:
6372 case UNSPEC_GR_RESTORE:
6374 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6375 HOST_WIDE_INT bit = (offset >> 3) & 63;
6377 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6378 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6379 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6384 case UNSPEC_FR_SPILL:
6385 case UNSPEC_FR_RESTORE:
6386 case UNSPEC_GETF_EXP:
6387 case UNSPEC_SETF_EXP:
6389 case UNSPEC_FR_SQRT_RECIP_APPROX:
6390 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6395 case UNSPEC_CHKACLR:
6397 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6400 case UNSPEC_FR_RECIP_APPROX:
6402 case UNSPEC_COPYSIGN:
6403 case UNSPEC_FR_RECIP_APPROX_RES:
6404 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6405 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6408 case UNSPEC_CMPXCHG_ACQ:
6409 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6410 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6418 case UNSPEC_VOLATILE:
6419 switch (XINT (x, 1))
6422 /* Alloc must always be the first instruction of a group.
6423 We force this by always returning true. */
6424 /* ??? We might get better scheduling if we explicitly check for
6425 input/local/output register dependencies, and modify the
6426 scheduler so that alloc is always reordered to the start of
6427 the current group. We could then eliminate all of the
6428 first_instruction code. */
6429 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6431 new_flags.is_write = 1;
6432 rws_access_regno (REG_AR_CFM, new_flags, pred);
6435 case UNSPECV_SET_BSP:
6439 case UNSPECV_BLOCKAGE:
6440 case UNSPECV_INSN_GROUP_BARRIER:
6442 case UNSPECV_PSAC_ALL:
6443 case UNSPECV_PSAC_NORMAL:
6452 new_flags.is_write = 0;
6453 need_barrier = rws_access_regno (REG_RP, flags, pred);
6454 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6456 new_flags.is_write = 1;
6457 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6458 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6462 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6463 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6464 switch (format_ptr[i])
6466 case '0': /* unused field */
6467 case 'i': /* integer */
6468 case 'n': /* note */
6469 case 'w': /* wide integer */
6470 case 's': /* pointer to string */
6471 case 'S': /* optional pointer to string */
6475 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6480 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6481 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6490 return need_barrier;
6493 /* Clear out the state for group_barrier_needed at the start of a
6494 sequence of insns. */
6497 init_insn_group_barriers (void)
6499 memset (rws_sum, 0, sizeof (rws_sum));
6500 first_instruction = 1;
6503 /* Given the current state, determine whether a group barrier (a stop bit) is
6504 necessary before INSN. Return nonzero if so. This modifies the state to
6505 include the effects of INSN as a side-effect. */
6508 group_barrier_needed (rtx insn)
6511 int need_barrier = 0;
6512 struct reg_flags flags;
6514 memset (&flags, 0, sizeof (flags));
6515 switch (GET_CODE (insn))
6522 /* A barrier doesn't imply an instruction group boundary. */
6526 memset (rws_insn, 0, sizeof (rws_insn));
6530 flags.is_branch = 1;
6531 flags.is_sibcall = SIBLING_CALL_P (insn);
6532 memset (rws_insn, 0, sizeof (rws_insn));
6534 /* Don't bundle a call following another call. */
6535 if ((pat = prev_active_insn (insn))
6536 && GET_CODE (pat) == CALL_INSN)
6542 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6546 if (!ia64_spec_check_p (insn))
6547 flags.is_branch = 1;
6549 /* Don't bundle a jump following a call. */
6550 if ((pat = prev_active_insn (insn))
6551 && GET_CODE (pat) == CALL_INSN)
6559 if (GET_CODE (PATTERN (insn)) == USE
6560 || GET_CODE (PATTERN (insn)) == CLOBBER)
6561 /* Don't care about USE and CLOBBER "insns"---those are used to
6562 indicate to the optimizer that it shouldn't get rid of
6563 certain operations. */
6566 pat = PATTERN (insn);
6568 /* Ug. Hack hacks hacked elsewhere. */
6569 switch (recog_memoized (insn))
6571 /* We play dependency tricks with the epilogue in order
6572 to get proper schedules. Undo this for dv analysis. */
6573 case CODE_FOR_epilogue_deallocate_stack:
6574 case CODE_FOR_prologue_allocate_stack:
6575 pat = XVECEXP (pat, 0, 0);
6578 /* The pattern we use for br.cloop confuses the code above.
6579 The second element of the vector is representative. */
6580 case CODE_FOR_doloop_end_internal:
6581 pat = XVECEXP (pat, 0, 1);
6584 /* Doesn't generate code. */
6585 case CODE_FOR_pred_rel_mutex:
6586 case CODE_FOR_prologue_use:
6593 memset (rws_insn, 0, sizeof (rws_insn));
6594 need_barrier = rtx_needs_barrier (pat, flags, 0);
6596 /* Check to see if the previous instruction was a volatile
6599 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6607 if (first_instruction && INSN_P (insn)
6608 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6609 && GET_CODE (PATTERN (insn)) != USE
6610 && GET_CODE (PATTERN (insn)) != CLOBBER)
6613 first_instruction = 0;
6616 return need_barrier;
6619 /* Like group_barrier_needed, but do not clobber the current state. */
6622 safe_group_barrier_needed (rtx insn)
6624 int saved_first_instruction;
6627 saved_first_instruction = first_instruction;
6628 in_safe_group_barrier = 1;
6630 t = group_barrier_needed (insn);
6632 first_instruction = saved_first_instruction;
6633 in_safe_group_barrier = 0;
6638 /* Scan the current function and insert stop bits as necessary to
6639 eliminate dependencies. This function assumes that a final
6640 instruction scheduling pass has been run which has already
6641 inserted most of the necessary stop bits. This function only
6642 inserts new ones at basic block boundaries, since these are
6643 invisible to the scheduler. */
6646 emit_insn_group_barriers (FILE *dump)
6650 int insns_since_last_label = 0;
6652 init_insn_group_barriers ();
6654 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6656 if (GET_CODE (insn) == CODE_LABEL)
6658 if (insns_since_last_label)
6660 insns_since_last_label = 0;
6662 else if (GET_CODE (insn) == NOTE
6663 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6665 if (insns_since_last_label)
6667 insns_since_last_label = 0;
6669 else if (GET_CODE (insn) == INSN
6670 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6671 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6673 init_insn_group_barriers ();
6676 else if (NONDEBUG_INSN_P (insn))
6678 insns_since_last_label = 1;
6680 if (group_barrier_needed (insn))
6685 fprintf (dump, "Emitting stop before label %d\n",
6686 INSN_UID (last_label));
6687 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6690 init_insn_group_barriers ();
6698 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6699 This function has to emit all necessary group barriers. */
6702 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6706 init_insn_group_barriers ();
6708 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6710 if (GET_CODE (insn) == BARRIER)
6712 rtx last = prev_active_insn (insn);
6716 if (GET_CODE (last) == JUMP_INSN
6717 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6718 last = prev_active_insn (last);
6719 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6720 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6722 init_insn_group_barriers ();
6724 else if (NONDEBUG_INSN_P (insn))
6726 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6727 init_insn_group_barriers ();
6728 else if (group_barrier_needed (insn))
6730 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6731 init_insn_group_barriers ();
6732 group_barrier_needed (insn);
6740 /* Instruction scheduling support. */
6742 #define NR_BUNDLES 10
6744 /* A list of names of all available bundles. */
6746 static const char *bundle_name [NR_BUNDLES] =
6752 #if NR_BUNDLES == 10
6762 /* Nonzero if we should insert stop bits into the schedule. */
6764 int ia64_final_schedule = 0;
6766 /* Codes of the corresponding queried units: */
6768 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6769 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6771 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6772 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6774 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6776 /* The following variable value is an insn group barrier. */
6778 static rtx dfa_stop_insn;
6780 /* The following variable value is the last issued insn. */
6782 static rtx last_scheduled_insn;
6784 /* The following variable value is pointer to a DFA state used as
6785 temporary variable. */
6787 static state_t temp_dfa_state = NULL;
6789 /* The following variable value is DFA state after issuing the last
6792 static state_t prev_cycle_state = NULL;
6794 /* The following array element values are TRUE if the corresponding
6795 insn requires to add stop bits before it. */
6797 static char *stops_p = NULL;
6799 /* The following variable is used to set up the mentioned above array. */
6801 static int stop_before_p = 0;
6803 /* The following variable value is length of the arrays `clocks' and
6806 static int clocks_length;
6808 /* The following variable value is number of data speculations in progress. */
6809 static int pending_data_specs = 0;
6811 /* Number of memory references on current and three future processor cycles. */
6812 static char mem_ops_in_group[4];
6814 /* Number of current processor cycle (from scheduler's point of view). */
6815 static int current_cycle;
6817 static rtx ia64_single_set (rtx);
6818 static void ia64_emit_insn_before (rtx, rtx);
6820 /* Map a bundle number to its pseudo-op. */
6823 get_bundle_name (int b)
6825 return bundle_name[b];
6829 /* Return the maximum number of instructions a cpu can issue. */
6832 ia64_issue_rate (void)
6837 /* Helper function - like single_set, but look inside COND_EXEC. */
6840 ia64_single_set (rtx insn)
6842 rtx x = PATTERN (insn), ret;
6843 if (GET_CODE (x) == COND_EXEC)
6844 x = COND_EXEC_CODE (x);
6845 if (GET_CODE (x) == SET)
6848 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6849 Although they are not classical single set, the second set is there just
6850 to protect it from moving past FP-relative stack accesses. */
6851 switch (recog_memoized (insn))
6853 case CODE_FOR_prologue_allocate_stack:
6854 case CODE_FOR_epilogue_deallocate_stack:
6855 ret = XVECEXP (x, 0, 0);
6859 ret = single_set_2 (insn, x);
6866 /* Adjust the cost of a scheduling dependency.
6867 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6868 COST is the current cost, DW is dependency weakness. */
6870 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6872 enum reg_note dep_type = (enum reg_note) dep_type1;
6873 enum attr_itanium_class dep_class;
6874 enum attr_itanium_class insn_class;
6876 insn_class = ia64_safe_itanium_class (insn);
6877 dep_class = ia64_safe_itanium_class (dep_insn);
6879 /* Treat true memory dependencies separately. Ignore apparent true
6880 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6881 if (dep_type == REG_DEP_TRUE
6882 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6883 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6886 if (dw == MIN_DEP_WEAK)
6887 /* Store and load are likely to alias, use higher cost to avoid stall. */
6888 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6889 else if (dw > MIN_DEP_WEAK)
6891 /* Store and load are less likely to alias. */
6892 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6893 /* Assume there will be no cache conflict for floating-point data.
6894 For integer data, L1 conflict penalty is huge (17 cycles), so we
6895 never assume it will not cause a conflict. */
6901 if (dep_type != REG_DEP_OUTPUT)
6904 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6905 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6911 /* Like emit_insn_before, but skip cycle_display notes.
6912 ??? When cycle display notes are implemented, update this. */
6915 ia64_emit_insn_before (rtx insn, rtx before)
6917 emit_insn_before (insn, before);
6920 /* The following function marks insns who produce addresses for load
6921 and store insns. Such insns will be placed into M slots because it
6922 decrease latency time for Itanium1 (see function
6923 `ia64_produce_address_p' and the DFA descriptions). */
6926 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6928 rtx insn, next, next_tail;
6930 /* Before reload, which_alternative is not set, which means that
6931 ia64_safe_itanium_class will produce wrong results for (at least)
6932 move instructions. */
6933 if (!reload_completed)
6936 next_tail = NEXT_INSN (tail);
6937 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6940 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6942 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6944 sd_iterator_def sd_it;
6946 bool has_mem_op_consumer_p = false;
6948 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6950 enum attr_itanium_class c;
6952 if (DEP_TYPE (dep) != REG_DEP_TRUE)
6955 next = DEP_CON (dep);
6956 c = ia64_safe_itanium_class (next);
6957 if ((c == ITANIUM_CLASS_ST
6958 || c == ITANIUM_CLASS_STF)
6959 && ia64_st_address_bypass_p (insn, next))
6961 has_mem_op_consumer_p = true;
6964 else if ((c == ITANIUM_CLASS_LD
6965 || c == ITANIUM_CLASS_FLD
6966 || c == ITANIUM_CLASS_FLDP)
6967 && ia64_ld_address_bypass_p (insn, next))
6969 has_mem_op_consumer_p = true;
6974 insn->call = has_mem_op_consumer_p;
6978 /* We're beginning a new block. Initialize data structures as necessary. */
6981 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6982 int sched_verbose ATTRIBUTE_UNUSED,
6983 int max_ready ATTRIBUTE_UNUSED)
6985 #ifdef ENABLE_CHECKING
6988 if (!sel_sched_p () && reload_completed)
6989 for (insn = NEXT_INSN (current_sched_info->prev_head);
6990 insn != current_sched_info->next_tail;
6991 insn = NEXT_INSN (insn))
6992 gcc_assert (!SCHED_GROUP_P (insn));
6994 last_scheduled_insn = NULL_RTX;
6995 init_insn_group_barriers ();
6998 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7001 /* We're beginning a scheduling pass. Check assertion. */
7004 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7005 int sched_verbose ATTRIBUTE_UNUSED,
7006 int max_ready ATTRIBUTE_UNUSED)
7008 gcc_assert (pending_data_specs == 0);
7011 /* Scheduling pass is now finished. Free/reset static variable. */
7013 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7014 int sched_verbose ATTRIBUTE_UNUSED)
7016 gcc_assert (pending_data_specs == 0);
7019 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7020 speculation check), FALSE otherwise. */
7022 is_load_p (rtx insn)
7024 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7027 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7028 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7031 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7032 (taking account for 3-cycle cache reference postponing for stores: Intel
7033 Itanium 2 Reference Manual for Software Development and Optimization,
7036 record_memory_reference (rtx insn)
7038 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7040 switch (insn_class) {
7041 case ITANIUM_CLASS_FLD:
7042 case ITANIUM_CLASS_LD:
7043 mem_ops_in_group[current_cycle % 4]++;
7045 case ITANIUM_CLASS_STF:
7046 case ITANIUM_CLASS_ST:
7047 mem_ops_in_group[(current_cycle + 3) % 4]++;
7053 /* We are about to being issuing insns for this clock cycle.
7054 Override the default sort algorithm to better slot instructions. */
7057 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
7058 int *pn_ready, int clock_var,
7062 int n_ready = *pn_ready;
7063 rtx *e_ready = ready + n_ready;
7067 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7069 if (reorder_type == 0)
7071 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7073 for (insnp = ready; insnp < e_ready; insnp++)
7074 if (insnp < e_ready)
7077 enum attr_type t = ia64_safe_type (insn);
7078 if (t == TYPE_UNKNOWN)
7080 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7081 || asm_noperands (PATTERN (insn)) >= 0)
7083 rtx lowest = ready[n_asms];
7084 ready[n_asms] = insn;
7090 rtx highest = ready[n_ready - 1];
7091 ready[n_ready - 1] = insn;
7098 if (n_asms < n_ready)
7100 /* Some normal insns to process. Skip the asms. */
7104 else if (n_ready > 0)
7108 if (ia64_final_schedule)
7111 int nr_need_stop = 0;
7113 for (insnp = ready; insnp < e_ready; insnp++)
7114 if (safe_group_barrier_needed (*insnp))
7117 if (reorder_type == 1 && n_ready == nr_need_stop)
7119 if (reorder_type == 0)
7122 /* Move down everything that needs a stop bit, preserving
7124 while (insnp-- > ready + deleted)
7125 while (insnp >= ready + deleted)
7128 if (! safe_group_barrier_needed (insn))
7130 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7138 current_cycle = clock_var;
7139 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7144 /* Move down loads/stores, preserving relative order. */
7145 while (insnp-- > ready + moved)
7146 while (insnp >= ready + moved)
7149 if (! is_load_p (insn))
7151 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7162 /* We are about to being issuing insns for this clock cycle. Override
7163 the default sort algorithm to better slot instructions. */
7166 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7169 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7170 pn_ready, clock_var, 0);
7173 /* Like ia64_sched_reorder, but called after issuing each insn.
7174 Override the default sort algorithm to better slot instructions. */
7177 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7178 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7179 int *pn_ready, int clock_var)
7181 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7185 /* We are about to issue INSN. Return the number of insns left on the
7186 ready queue that can be issued this cycle. */
7189 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7190 int sched_verbose ATTRIBUTE_UNUSED,
7191 rtx insn ATTRIBUTE_UNUSED,
7192 int can_issue_more ATTRIBUTE_UNUSED)
7194 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7195 /* Modulo scheduling does not extend h_i_d when emitting
7196 new instructions. Don't use h_i_d, if we don't have to. */
7198 if (DONE_SPEC (insn) & BEGIN_DATA)
7199 pending_data_specs++;
7200 if (CHECK_SPEC (insn) & BEGIN_DATA)
7201 pending_data_specs--;
7204 if (DEBUG_INSN_P (insn))
7207 last_scheduled_insn = insn;
7208 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7209 if (reload_completed)
7211 int needed = group_barrier_needed (insn);
7213 gcc_assert (!needed);
7214 if (GET_CODE (insn) == CALL_INSN)
7215 init_insn_group_barriers ();
7216 stops_p [INSN_UID (insn)] = stop_before_p;
7219 record_memory_reference (insn);
7224 /* We are choosing insn from the ready queue. Return nonzero if INSN
7228 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7230 gcc_assert (insn && INSN_P (insn));
7231 return ((!reload_completed
7232 || !safe_group_barrier_needed (insn))
7233 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7234 && (!mflag_sched_mem_insns_hard_limit
7235 || !is_load_p (insn)
7236 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7239 /* We are choosing insn from the ready queue. Return nonzero if INSN
7243 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7245 gcc_assert (insn && INSN_P (insn));
7246 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7247 we keep ALAT half-empty. */
7248 return (pending_data_specs < 16
7249 || !(TODO_SPEC (insn) & BEGIN_DATA));
7252 /* The following variable value is pseudo-insn used by the DFA insn
7253 scheduler to change the DFA state when the simulated clock is
7256 static rtx dfa_pre_cycle_insn;
7258 /* Returns 1 when a meaningful insn was scheduled between the last group
7259 barrier and LAST. */
7261 scheduled_good_insn (rtx last)
7263 if (last && recog_memoized (last) >= 0)
7267 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7268 && !stops_p[INSN_UID (last)];
7269 last = PREV_INSN (last))
7270 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7271 the ebb we're scheduling. */
7272 if (INSN_P (last) && recog_memoized (last) >= 0)
7278 /* We are about to being issuing INSN. Return nonzero if we cannot
7279 issue it on given cycle CLOCK and return zero if we should not sort
7280 the ready queue on the next clock start. */
7283 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7284 int clock, int *sort_p)
7286 gcc_assert (insn && INSN_P (insn));
7288 if (DEBUG_INSN_P (insn))
7291 /* When a group barrier is needed for insn, last_scheduled_insn
7293 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7294 || last_scheduled_insn);
7296 if ((reload_completed
7297 && (safe_group_barrier_needed (insn)
7298 || (mflag_sched_stop_bits_after_every_cycle
7299 && last_clock != clock
7300 && last_scheduled_insn
7301 && scheduled_good_insn (last_scheduled_insn))))
7302 || (last_scheduled_insn
7303 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7304 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7305 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7307 init_insn_group_barriers ();
7309 if (verbose && dump)
7310 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7311 last_clock == clock ? " + cycle advance" : "");
7314 current_cycle = clock;
7315 mem_ops_in_group[current_cycle % 4] = 0;
7317 if (last_clock == clock)
7319 state_transition (curr_state, dfa_stop_insn);
7320 if (TARGET_EARLY_STOP_BITS)
7321 *sort_p = (last_scheduled_insn == NULL_RTX
7322 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7328 if (last_scheduled_insn)
7330 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7331 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7332 state_reset (curr_state);
7335 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7336 state_transition (curr_state, dfa_stop_insn);
7337 state_transition (curr_state, dfa_pre_cycle_insn);
7338 state_transition (curr_state, NULL);
7345 /* Implement targetm.sched.h_i_d_extended hook.
7346 Extend internal data structures. */
7348 ia64_h_i_d_extended (void)
7350 if (stops_p != NULL)
7352 int new_clocks_length = get_max_uid () * 3 / 2;
7353 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7354 clocks_length = new_clocks_length;
7359 /* This structure describes the data used by the backend to guide scheduling.
7360 When the current scheduling point is switched, this data should be saved
7361 and restored later, if the scheduler returns to this point. */
7362 struct _ia64_sched_context
7364 state_t prev_cycle_state;
7365 rtx last_scheduled_insn;
7366 struct reg_write_state rws_sum[NUM_REGS];
7367 struct reg_write_state rws_insn[NUM_REGS];
7368 int first_instruction;
7369 int pending_data_specs;
7371 char mem_ops_in_group[4];
7373 typedef struct _ia64_sched_context *ia64_sched_context_t;
7375 /* Allocates a scheduling context. */
7377 ia64_alloc_sched_context (void)
7379 return xmalloc (sizeof (struct _ia64_sched_context));
7382 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7383 the global context otherwise. */
7385 ia64_init_sched_context (void *_sc, bool clean_p)
7387 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7389 sc->prev_cycle_state = xmalloc (dfa_state_size);
7392 state_reset (sc->prev_cycle_state);
7393 sc->last_scheduled_insn = NULL_RTX;
7394 memset (sc->rws_sum, 0, sizeof (rws_sum));
7395 memset (sc->rws_insn, 0, sizeof (rws_insn));
7396 sc->first_instruction = 1;
7397 sc->pending_data_specs = 0;
7398 sc->current_cycle = 0;
7399 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7403 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7404 sc->last_scheduled_insn = last_scheduled_insn;
7405 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7406 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7407 sc->first_instruction = first_instruction;
7408 sc->pending_data_specs = pending_data_specs;
7409 sc->current_cycle = current_cycle;
7410 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7414 /* Sets the global scheduling context to the one pointed to by _SC. */
7416 ia64_set_sched_context (void *_sc)
7418 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7420 gcc_assert (sc != NULL);
7422 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7423 last_scheduled_insn = sc->last_scheduled_insn;
7424 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7425 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7426 first_instruction = sc->first_instruction;
7427 pending_data_specs = sc->pending_data_specs;
7428 current_cycle = sc->current_cycle;
7429 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7432 /* Clears the data in the _SC scheduling context. */
7434 ia64_clear_sched_context (void *_sc)
7436 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7438 free (sc->prev_cycle_state);
7439 sc->prev_cycle_state = NULL;
7442 /* Frees the _SC scheduling context. */
7444 ia64_free_sched_context (void *_sc)
7446 gcc_assert (_sc != NULL);
7451 typedef rtx (* gen_func_t) (rtx, rtx);
7453 /* Return a function that will generate a load of mode MODE_NO
7454 with speculation types TS. */
7456 get_spec_load_gen_function (ds_t ts, int mode_no)
7458 static gen_func_t gen_ld_[] = {
7468 gen_zero_extendqidi2,
7469 gen_zero_extendhidi2,
7470 gen_zero_extendsidi2,
7473 static gen_func_t gen_ld_a[] = {
7483 gen_zero_extendqidi2_advanced,
7484 gen_zero_extendhidi2_advanced,
7485 gen_zero_extendsidi2_advanced,
7487 static gen_func_t gen_ld_s[] = {
7488 gen_movbi_speculative,
7489 gen_movqi_speculative,
7490 gen_movhi_speculative,
7491 gen_movsi_speculative,
7492 gen_movdi_speculative,
7493 gen_movsf_speculative,
7494 gen_movdf_speculative,
7495 gen_movxf_speculative,
7496 gen_movti_speculative,
7497 gen_zero_extendqidi2_speculative,
7498 gen_zero_extendhidi2_speculative,
7499 gen_zero_extendsidi2_speculative,
7501 static gen_func_t gen_ld_sa[] = {
7502 gen_movbi_speculative_advanced,
7503 gen_movqi_speculative_advanced,
7504 gen_movhi_speculative_advanced,
7505 gen_movsi_speculative_advanced,
7506 gen_movdi_speculative_advanced,
7507 gen_movsf_speculative_advanced,
7508 gen_movdf_speculative_advanced,
7509 gen_movxf_speculative_advanced,
7510 gen_movti_speculative_advanced,
7511 gen_zero_extendqidi2_speculative_advanced,
7512 gen_zero_extendhidi2_speculative_advanced,
7513 gen_zero_extendsidi2_speculative_advanced,
7515 static gen_func_t gen_ld_s_a[] = {
7516 gen_movbi_speculative_a,
7517 gen_movqi_speculative_a,
7518 gen_movhi_speculative_a,
7519 gen_movsi_speculative_a,
7520 gen_movdi_speculative_a,
7521 gen_movsf_speculative_a,
7522 gen_movdf_speculative_a,
7523 gen_movxf_speculative_a,
7524 gen_movti_speculative_a,
7525 gen_zero_extendqidi2_speculative_a,
7526 gen_zero_extendhidi2_speculative_a,
7527 gen_zero_extendsidi2_speculative_a,
7532 if (ts & BEGIN_DATA)
7534 if (ts & BEGIN_CONTROL)
7539 else if (ts & BEGIN_CONTROL)
7541 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7542 || ia64_needs_block_p (ts))
7545 gen_ld = gen_ld_s_a;
7552 return gen_ld[mode_no];
7555 /* Constants that help mapping 'enum machine_mode' to int. */
7558 SPEC_MODE_INVALID = -1,
7559 SPEC_MODE_FIRST = 0,
7560 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7561 SPEC_MODE_FOR_EXTEND_LAST = 3,
7567 /* Offset to reach ZERO_EXTEND patterns. */
7568 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7571 /* Return index of the MODE. */
7573 ia64_mode_to_int (enum machine_mode mode)
7577 case BImode: return 0; /* SPEC_MODE_FIRST */
7578 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7579 case HImode: return 2;
7580 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7581 case DImode: return 4;
7582 case SFmode: return 5;
7583 case DFmode: return 6;
7584 case XFmode: return 7;
7586 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7587 mentioned in itanium[12].md. Predicate fp_register_operand also
7588 needs to be defined. Bottom line: better disable for now. */
7589 return SPEC_MODE_INVALID;
7590 default: return SPEC_MODE_INVALID;
7594 /* Provide information about speculation capabilities. */
7596 ia64_set_sched_flags (spec_info_t spec_info)
7598 unsigned int *flags = &(current_sched_info->flags);
7600 if (*flags & SCHED_RGN
7601 || *flags & SCHED_EBB
7602 || *flags & SEL_SCHED)
7606 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7607 || (mflag_sched_ar_data_spec && reload_completed))
7612 && ((mflag_sched_br_in_data_spec && !reload_completed)
7613 || (mflag_sched_ar_in_data_spec && reload_completed)))
7617 if (mflag_sched_control_spec
7619 || reload_completed))
7621 mask |= BEGIN_CONTROL;
7623 if (!sel_sched_p () && mflag_sched_in_control_spec)
7624 mask |= BE_IN_CONTROL;
7627 spec_info->mask = mask;
7631 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7633 if (mask & BE_IN_SPEC)
7636 spec_info->flags = 0;
7638 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7639 spec_info->flags |= PREFER_NON_DATA_SPEC;
7641 if (mask & CONTROL_SPEC)
7643 if (mflag_sched_prefer_non_control_spec_insns)
7644 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7646 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7647 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7650 if (sched_verbose >= 1)
7651 spec_info->dump = sched_dump;
7653 spec_info->dump = 0;
7655 if (mflag_sched_count_spec_in_critical_path)
7656 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7660 spec_info->mask = 0;
7663 /* If INSN is an appropriate load return its mode.
7664 Return -1 otherwise. */
7666 get_mode_no_for_insn (rtx insn)
7668 rtx reg, mem, mode_rtx;
7672 extract_insn_cached (insn);
7674 /* We use WHICH_ALTERNATIVE only after reload. This will
7675 guarantee that reload won't touch a speculative insn. */
7677 if (recog_data.n_operands != 2)
7680 reg = recog_data.operand[0];
7681 mem = recog_data.operand[1];
7683 /* We should use MEM's mode since REG's mode in presence of
7684 ZERO_EXTEND will always be DImode. */
7685 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7686 /* Process non-speculative ld. */
7688 if (!reload_completed)
7690 /* Do not speculate into regs like ar.lc. */
7691 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7698 rtx mem_reg = XEXP (mem, 0);
7700 if (!REG_P (mem_reg))
7706 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7708 gcc_assert (REG_P (reg) && MEM_P (mem));
7714 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7715 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7716 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7717 /* Process speculative ld or ld.c. */
7719 gcc_assert (REG_P (reg) && MEM_P (mem));
7724 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7726 if (attr_class == ITANIUM_CLASS_CHK_A
7727 || attr_class == ITANIUM_CLASS_CHK_S_I
7728 || attr_class == ITANIUM_CLASS_CHK_S_F)
7735 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7737 if (mode_no == SPEC_MODE_INVALID)
7740 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7744 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7745 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7748 mode_no += SPEC_GEN_EXTEND_OFFSET;
7754 /* If X is an unspec part of a speculative load, return its code.
7755 Return -1 otherwise. */
7757 get_spec_unspec_code (const_rtx x)
7759 if (GET_CODE (x) != UNSPEC)
7781 /* Implement skip_rtx_p hook. */
7783 ia64_skip_rtx_p (const_rtx x)
7785 return get_spec_unspec_code (x) != -1;
7788 /* If INSN is a speculative load, return its UNSPEC code.
7789 Return -1 otherwise. */
7791 get_insn_spec_code (const_rtx insn)
7795 pat = PATTERN (insn);
7797 if (GET_CODE (pat) == COND_EXEC)
7798 pat = COND_EXEC_CODE (pat);
7800 if (GET_CODE (pat) != SET)
7803 reg = SET_DEST (pat);
7807 mem = SET_SRC (pat);
7808 if (GET_CODE (mem) == ZERO_EXTEND)
7809 mem = XEXP (mem, 0);
7811 return get_spec_unspec_code (mem);
7814 /* If INSN is a speculative load, return a ds with the speculation types.
7815 Otherwise [if INSN is a normal instruction] return 0. */
7817 ia64_get_insn_spec_ds (rtx insn)
7819 int code = get_insn_spec_code (insn);
7828 return BEGIN_CONTROL;
7831 return BEGIN_DATA | BEGIN_CONTROL;
7838 /* If INSN is a speculative load return a ds with the speculation types that
7840 Otherwise [if INSN is a normal instruction] return 0. */
7842 ia64_get_insn_checked_ds (rtx insn)
7844 int code = get_insn_spec_code (insn);
7849 return BEGIN_DATA | BEGIN_CONTROL;
7852 return BEGIN_CONTROL;
7856 return BEGIN_DATA | BEGIN_CONTROL;
7863 /* If GEN_P is true, calculate the index of needed speculation check and return
7864 speculative pattern for INSN with speculative mode TS, machine mode
7865 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7866 If GEN_P is false, just calculate the index of needed speculation check. */
7868 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7871 gen_func_t gen_load;
7873 gen_load = get_spec_load_gen_function (ts, mode_no);
7875 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7876 copy_rtx (recog_data.operand[1]));
7878 pat = PATTERN (insn);
7879 if (GET_CODE (pat) == COND_EXEC)
7880 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7887 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7888 ds_t ds ATTRIBUTE_UNUSED)
7893 /* Implement targetm.sched.speculate_insn hook.
7894 Check if the INSN can be TS speculative.
7895 If 'no' - return -1.
7896 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7897 If current pattern of the INSN already provides TS speculation,
7900 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7905 gcc_assert (!(ts & ~SPECULATIVE));
7907 if (ia64_spec_check_p (insn))
7910 if ((ts & BE_IN_SPEC)
7911 && !insn_can_be_in_speculative_p (insn, ts))
7914 mode_no = get_mode_no_for_insn (insn);
7916 if (mode_no != SPEC_MODE_INVALID)
7918 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7923 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7932 /* Return a function that will generate a check for speculation TS with mode
7934 If simple check is needed, pass true for SIMPLE_CHECK_P.
7935 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7937 get_spec_check_gen_function (ds_t ts, int mode_no,
7938 bool simple_check_p, bool clearing_check_p)
7940 static gen_func_t gen_ld_c_clr[] = {
7950 gen_zero_extendqidi2_clr,
7951 gen_zero_extendhidi2_clr,
7952 gen_zero_extendsidi2_clr,
7954 static gen_func_t gen_ld_c_nc[] = {
7964 gen_zero_extendqidi2_nc,
7965 gen_zero_extendhidi2_nc,
7966 gen_zero_extendsidi2_nc,
7968 static gen_func_t gen_chk_a_clr[] = {
7969 gen_advanced_load_check_clr_bi,
7970 gen_advanced_load_check_clr_qi,
7971 gen_advanced_load_check_clr_hi,
7972 gen_advanced_load_check_clr_si,
7973 gen_advanced_load_check_clr_di,
7974 gen_advanced_load_check_clr_sf,
7975 gen_advanced_load_check_clr_df,
7976 gen_advanced_load_check_clr_xf,
7977 gen_advanced_load_check_clr_ti,
7978 gen_advanced_load_check_clr_di,
7979 gen_advanced_load_check_clr_di,
7980 gen_advanced_load_check_clr_di,
7982 static gen_func_t gen_chk_a_nc[] = {
7983 gen_advanced_load_check_nc_bi,
7984 gen_advanced_load_check_nc_qi,
7985 gen_advanced_load_check_nc_hi,
7986 gen_advanced_load_check_nc_si,
7987 gen_advanced_load_check_nc_di,
7988 gen_advanced_load_check_nc_sf,
7989 gen_advanced_load_check_nc_df,
7990 gen_advanced_load_check_nc_xf,
7991 gen_advanced_load_check_nc_ti,
7992 gen_advanced_load_check_nc_di,
7993 gen_advanced_load_check_nc_di,
7994 gen_advanced_load_check_nc_di,
7996 static gen_func_t gen_chk_s[] = {
7997 gen_speculation_check_bi,
7998 gen_speculation_check_qi,
7999 gen_speculation_check_hi,
8000 gen_speculation_check_si,
8001 gen_speculation_check_di,
8002 gen_speculation_check_sf,
8003 gen_speculation_check_df,
8004 gen_speculation_check_xf,
8005 gen_speculation_check_ti,
8006 gen_speculation_check_di,
8007 gen_speculation_check_di,
8008 gen_speculation_check_di,
8011 gen_func_t *gen_check;
8013 if (ts & BEGIN_DATA)
8015 /* We don't need recovery because even if this is ld.sa
8016 ALAT entry will be allocated only if NAT bit is set to zero.
8017 So it is enough to use ld.c here. */
8021 gcc_assert (mflag_sched_spec_ldc);
8023 if (clearing_check_p)
8024 gen_check = gen_ld_c_clr;
8026 gen_check = gen_ld_c_nc;
8030 if (clearing_check_p)
8031 gen_check = gen_chk_a_clr;
8033 gen_check = gen_chk_a_nc;
8036 else if (ts & BEGIN_CONTROL)
8039 /* We might want to use ld.sa -> ld.c instead of
8042 gcc_assert (!ia64_needs_block_p (ts));
8044 if (clearing_check_p)
8045 gen_check = gen_ld_c_clr;
8047 gen_check = gen_ld_c_nc;
8051 gen_check = gen_chk_s;
8057 gcc_assert (mode_no >= 0);
8058 return gen_check[mode_no];
8061 /* Return nonzero, if INSN needs branchy recovery check. */
8063 ia64_needs_block_p (ds_t ts)
8065 if (ts & BEGIN_DATA)
8066 return !mflag_sched_spec_ldc;
8068 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8070 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8073 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8074 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8075 Otherwise, generate a simple check. */
8077 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
8079 rtx op1, pat, check_pat;
8080 gen_func_t gen_check;
8083 mode_no = get_mode_no_for_insn (insn);
8084 gcc_assert (mode_no >= 0);
8090 gcc_assert (!ia64_needs_block_p (ds));
8091 op1 = copy_rtx (recog_data.operand[1]);
8094 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8097 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8099 pat = PATTERN (insn);
8100 if (GET_CODE (pat) == COND_EXEC)
8101 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8107 /* Return nonzero, if X is branchy recovery check. */
8109 ia64_spec_check_p (rtx x)
8112 if (GET_CODE (x) == COND_EXEC)
8113 x = COND_EXEC_CODE (x);
8114 if (GET_CODE (x) == SET)
8115 return ia64_spec_check_src_p (SET_SRC (x));
8119 /* Return nonzero, if SRC belongs to recovery check. */
8121 ia64_spec_check_src_p (rtx src)
8123 if (GET_CODE (src) == IF_THEN_ELSE)
8128 if (GET_CODE (t) == NE)
8132 if (GET_CODE (t) == UNSPEC)
8138 if (code == UNSPEC_LDCCLR
8139 || code == UNSPEC_LDCNC
8140 || code == UNSPEC_CHKACLR
8141 || code == UNSPEC_CHKANC
8142 || code == UNSPEC_CHKS)
8144 gcc_assert (code != 0);
8154 /* The following page contains abstract data `bundle states' which are
8155 used for bundling insns (inserting nops and template generation). */
8157 /* The following describes state of insn bundling. */
8161 /* Unique bundle state number to identify them in the debugging
8164 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
8165 /* number nops before and after the insn */
8166 short before_nops_num, after_nops_num;
8167 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8169 int cost; /* cost of the state in cycles */
8170 int accumulated_insns_num; /* number of all previous insns including
8171 nops. L is considered as 2 insns */
8172 int branch_deviation; /* deviation of previous branches from 3rd slots */
8173 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8174 struct bundle_state *next; /* next state with the same insn_num */
8175 struct bundle_state *originator; /* originator (previous insn state) */
8176 /* All bundle states are in the following chain. */
8177 struct bundle_state *allocated_states_chain;
8178 /* The DFA State after issuing the insn and the nops. */
8182 /* The following is map insn number to the corresponding bundle state. */
8184 static struct bundle_state **index_to_bundle_states;
8186 /* The unique number of next bundle state. */
8188 static int bundle_states_num;
8190 /* All allocated bundle states are in the following chain. */
8192 static struct bundle_state *allocated_bundle_states_chain;
8194 /* All allocated but not used bundle states are in the following
8197 static struct bundle_state *free_bundle_state_chain;
8200 /* The following function returns a free bundle state. */
8202 static struct bundle_state *
8203 get_free_bundle_state (void)
8205 struct bundle_state *result;
8207 if (free_bundle_state_chain != NULL)
8209 result = free_bundle_state_chain;
8210 free_bundle_state_chain = result->next;
8214 result = XNEW (struct bundle_state);
8215 result->dfa_state = xmalloc (dfa_state_size);
8216 result->allocated_states_chain = allocated_bundle_states_chain;
8217 allocated_bundle_states_chain = result;
8219 result->unique_num = bundle_states_num++;
8224 /* The following function frees given bundle state. */
8227 free_bundle_state (struct bundle_state *state)
8229 state->next = free_bundle_state_chain;
8230 free_bundle_state_chain = state;
8233 /* Start work with abstract data `bundle states'. */
8236 initiate_bundle_states (void)
8238 bundle_states_num = 0;
8239 free_bundle_state_chain = NULL;
8240 allocated_bundle_states_chain = NULL;
8243 /* Finish work with abstract data `bundle states'. */
8246 finish_bundle_states (void)
8248 struct bundle_state *curr_state, *next_state;
8250 for (curr_state = allocated_bundle_states_chain;
8252 curr_state = next_state)
8254 next_state = curr_state->allocated_states_chain;
8255 free (curr_state->dfa_state);
8260 /* Hash table of the bundle states. The key is dfa_state and insn_num
8261 of the bundle states. */
8263 static htab_t bundle_state_table;
8265 /* The function returns hash of BUNDLE_STATE. */
8268 bundle_state_hash (const void *bundle_state)
8270 const struct bundle_state *const state
8271 = (const struct bundle_state *) bundle_state;
8274 for (result = i = 0; i < dfa_state_size; i++)
8275 result += (((unsigned char *) state->dfa_state) [i]
8276 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8277 return result + state->insn_num;
8280 /* The function returns nonzero if the bundle state keys are equal. */
8283 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8285 const struct bundle_state *const state1
8286 = (const struct bundle_state *) bundle_state_1;
8287 const struct bundle_state *const state2
8288 = (const struct bundle_state *) bundle_state_2;
8290 return (state1->insn_num == state2->insn_num
8291 && memcmp (state1->dfa_state, state2->dfa_state,
8292 dfa_state_size) == 0);
8295 /* The function inserts the BUNDLE_STATE into the hash table. The
8296 function returns nonzero if the bundle has been inserted into the
8297 table. The table contains the best bundle state with given key. */
8300 insert_bundle_state (struct bundle_state *bundle_state)
8304 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8305 if (*entry_ptr == NULL)
8307 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8308 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8309 *entry_ptr = (void *) bundle_state;
8312 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8313 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8314 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8315 > bundle_state->accumulated_insns_num
8316 || (((struct bundle_state *)
8317 *entry_ptr)->accumulated_insns_num
8318 == bundle_state->accumulated_insns_num
8319 && (((struct bundle_state *)
8320 *entry_ptr)->branch_deviation
8321 > bundle_state->branch_deviation
8322 || (((struct bundle_state *)
8323 *entry_ptr)->branch_deviation
8324 == bundle_state->branch_deviation
8325 && ((struct bundle_state *)
8326 *entry_ptr)->middle_bundle_stops
8327 > bundle_state->middle_bundle_stops))))))
8330 struct bundle_state temp;
8332 temp = *(struct bundle_state *) *entry_ptr;
8333 *(struct bundle_state *) *entry_ptr = *bundle_state;
8334 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8335 *bundle_state = temp;
8340 /* Start work with the hash table. */
8343 initiate_bundle_state_table (void)
8345 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8349 /* Finish work with the hash table. */
8352 finish_bundle_state_table (void)
8354 htab_delete (bundle_state_table);
8359 /* The following variable is a insn `nop' used to check bundle states
8360 with different number of inserted nops. */
8362 static rtx ia64_nop;
8364 /* The following function tries to issue NOPS_NUM nops for the current
8365 state without advancing processor cycle. If it failed, the
8366 function returns FALSE and frees the current state. */
8369 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8373 for (i = 0; i < nops_num; i++)
8374 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8376 free_bundle_state (curr_state);
8382 /* The following function tries to issue INSN for the current
8383 state without advancing processor cycle. If it failed, the
8384 function returns FALSE and frees the current state. */
8387 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8389 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8391 free_bundle_state (curr_state);
8397 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8398 starting with ORIGINATOR without advancing processor cycle. If
8399 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8400 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8401 If it was successful, the function creates new bundle state and
8402 insert into the hash table and into `index_to_bundle_states'. */
8405 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8406 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8408 struct bundle_state *curr_state;
8410 curr_state = get_free_bundle_state ();
8411 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8412 curr_state->insn = insn;
8413 curr_state->insn_num = originator->insn_num + 1;
8414 curr_state->cost = originator->cost;
8415 curr_state->originator = originator;
8416 curr_state->before_nops_num = before_nops_num;
8417 curr_state->after_nops_num = 0;
8418 curr_state->accumulated_insns_num
8419 = originator->accumulated_insns_num + before_nops_num;
8420 curr_state->branch_deviation = originator->branch_deviation;
8421 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8423 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8425 gcc_assert (GET_MODE (insn) != TImode);
8426 if (!try_issue_nops (curr_state, before_nops_num))
8428 if (!try_issue_insn (curr_state, insn))
8430 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8431 if (curr_state->accumulated_insns_num % 3 != 0)
8432 curr_state->middle_bundle_stops++;
8433 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8434 && curr_state->accumulated_insns_num % 3 != 0)
8436 free_bundle_state (curr_state);
8440 else if (GET_MODE (insn) != TImode)
8442 if (!try_issue_nops (curr_state, before_nops_num))
8444 if (!try_issue_insn (curr_state, insn))
8446 curr_state->accumulated_insns_num++;
8447 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8448 && asm_noperands (PATTERN (insn)) < 0);
8450 if (ia64_safe_type (insn) == TYPE_L)
8451 curr_state->accumulated_insns_num++;
8455 /* If this is an insn that must be first in a group, then don't allow
8456 nops to be emitted before it. Currently, alloc is the only such
8457 supported instruction. */
8458 /* ??? The bundling automatons should handle this for us, but they do
8459 not yet have support for the first_insn attribute. */
8460 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8462 free_bundle_state (curr_state);
8466 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8467 state_transition (curr_state->dfa_state, NULL);
8469 if (!try_issue_nops (curr_state, before_nops_num))
8471 if (!try_issue_insn (curr_state, insn))
8473 curr_state->accumulated_insns_num++;
8474 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8475 || asm_noperands (PATTERN (insn)) >= 0)
8477 /* Finish bundle containing asm insn. */
8478 curr_state->after_nops_num
8479 = 3 - curr_state->accumulated_insns_num % 3;
8480 curr_state->accumulated_insns_num
8481 += 3 - curr_state->accumulated_insns_num % 3;
8483 else if (ia64_safe_type (insn) == TYPE_L)
8484 curr_state->accumulated_insns_num++;
8486 if (ia64_safe_type (insn) == TYPE_B)
8487 curr_state->branch_deviation
8488 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8489 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8491 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8494 struct bundle_state *curr_state1;
8495 struct bundle_state *allocated_states_chain;
8497 curr_state1 = get_free_bundle_state ();
8498 dfa_state = curr_state1->dfa_state;
8499 allocated_states_chain = curr_state1->allocated_states_chain;
8500 *curr_state1 = *curr_state;
8501 curr_state1->dfa_state = dfa_state;
8502 curr_state1->allocated_states_chain = allocated_states_chain;
8503 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8505 curr_state = curr_state1;
8507 if (!try_issue_nops (curr_state,
8508 3 - curr_state->accumulated_insns_num % 3))
8510 curr_state->after_nops_num
8511 = 3 - curr_state->accumulated_insns_num % 3;
8512 curr_state->accumulated_insns_num
8513 += 3 - curr_state->accumulated_insns_num % 3;
8515 if (!insert_bundle_state (curr_state))
8516 free_bundle_state (curr_state);
8520 /* The following function returns position in the two window bundle
8524 get_max_pos (state_t state)
8526 if (cpu_unit_reservation_p (state, pos_6))
8528 else if (cpu_unit_reservation_p (state, pos_5))
8530 else if (cpu_unit_reservation_p (state, pos_4))
8532 else if (cpu_unit_reservation_p (state, pos_3))
8534 else if (cpu_unit_reservation_p (state, pos_2))
8536 else if (cpu_unit_reservation_p (state, pos_1))
8542 /* The function returns code of a possible template for given position
8543 and state. The function should be called only with 2 values of
8544 position equal to 3 or 6. We avoid generating F NOPs by putting
8545 templates containing F insns at the end of the template search
8546 because undocumented anomaly in McKinley derived cores which can
8547 cause stalls if an F-unit insn (including a NOP) is issued within a
8548 six-cycle window after reading certain application registers (such
8549 as ar.bsp). Furthermore, power-considerations also argue against
8550 the use of F-unit instructions unless they're really needed. */
8553 get_template (state_t state, int pos)
8558 if (cpu_unit_reservation_p (state, _0mmi_))
8560 else if (cpu_unit_reservation_p (state, _0mii_))
8562 else if (cpu_unit_reservation_p (state, _0mmb_))
8564 else if (cpu_unit_reservation_p (state, _0mib_))
8566 else if (cpu_unit_reservation_p (state, _0mbb_))
8568 else if (cpu_unit_reservation_p (state, _0bbb_))
8570 else if (cpu_unit_reservation_p (state, _0mmf_))
8572 else if (cpu_unit_reservation_p (state, _0mfi_))
8574 else if (cpu_unit_reservation_p (state, _0mfb_))
8576 else if (cpu_unit_reservation_p (state, _0mlx_))
8581 if (cpu_unit_reservation_p (state, _1mmi_))
8583 else if (cpu_unit_reservation_p (state, _1mii_))
8585 else if (cpu_unit_reservation_p (state, _1mmb_))
8587 else if (cpu_unit_reservation_p (state, _1mib_))
8589 else if (cpu_unit_reservation_p (state, _1mbb_))
8591 else if (cpu_unit_reservation_p (state, _1bbb_))
8593 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8595 else if (cpu_unit_reservation_p (state, _1mfi_))
8597 else if (cpu_unit_reservation_p (state, _1mfb_))
8599 else if (cpu_unit_reservation_p (state, _1mlx_))
8608 /* True when INSN is important for bundling. */
8610 important_for_bundling_p (rtx insn)
8612 return (INSN_P (insn)
8613 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8614 && GET_CODE (PATTERN (insn)) != USE
8615 && GET_CODE (PATTERN (insn)) != CLOBBER);
8618 /* The following function returns an insn important for insn bundling
8619 followed by INSN and before TAIL. */
8622 get_next_important_insn (rtx insn, rtx tail)
8624 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8625 if (important_for_bundling_p (insn))
8630 /* Add a bundle selector TEMPLATE0 before INSN. */
8633 ia64_add_bundle_selector_before (int template0, rtx insn)
8635 rtx b = gen_bundle_selector (GEN_INT (template0));
8637 ia64_emit_insn_before (b, insn);
8638 #if NR_BUNDLES == 10
8639 if ((template0 == 4 || template0 == 5)
8640 && ia64_except_unwind_info () == UI_TARGET)
8643 rtx note = NULL_RTX;
8645 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8646 first or second slot. If it is and has REG_EH_NOTE set, copy it
8647 to following nops, as br.call sets rp to the address of following
8648 bundle and therefore an EH region end must be on a bundle
8650 insn = PREV_INSN (insn);
8651 for (i = 0; i < 3; i++)
8654 insn = next_active_insn (insn);
8655 while (GET_CODE (insn) == INSN
8656 && get_attr_empty (insn) == EMPTY_YES);
8657 if (GET_CODE (insn) == CALL_INSN)
8658 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8663 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8664 || code == CODE_FOR_nop_b);
8665 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8668 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8675 /* The following function does insn bundling. Bundling means
8676 inserting templates and nop insns to fit insn groups into permitted
8677 templates. Instruction scheduling uses NDFA (non-deterministic
8678 finite automata) encoding informations about the templates and the
8679 inserted nops. Nondeterminism of the automata permits follows
8680 all possible insn sequences very fast.
8682 Unfortunately it is not possible to get information about inserting
8683 nop insns and used templates from the automata states. The
8684 automata only says that we can issue an insn possibly inserting
8685 some nops before it and using some template. Therefore insn
8686 bundling in this function is implemented by using DFA
8687 (deterministic finite automata). We follow all possible insn
8688 sequences by inserting 0-2 nops (that is what the NDFA describe for
8689 insn scheduling) before/after each insn being bundled. We know the
8690 start of simulated processor cycle from insn scheduling (insn
8691 starting a new cycle has TImode).
8693 Simple implementation of insn bundling would create enormous
8694 number of possible insn sequences satisfying information about new
8695 cycle ticks taken from the insn scheduling. To make the algorithm
8696 practical we use dynamic programming. Each decision (about
8697 inserting nops and implicitly about previous decisions) is described
8698 by structure bundle_state (see above). If we generate the same
8699 bundle state (key is automaton state after issuing the insns and
8700 nops for it), we reuse already generated one. As consequence we
8701 reject some decisions which cannot improve the solution and
8702 reduce memory for the algorithm.
8704 When we reach the end of EBB (extended basic block), we choose the
8705 best sequence and then, moving back in EBB, insert templates for
8706 the best alternative. The templates are taken from querying
8707 automaton state for each insn in chosen bundle states.
8709 So the algorithm makes two (forward and backward) passes through
8713 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8715 struct bundle_state *curr_state, *next_state, *best_state;
8716 rtx insn, next_insn;
8718 int i, bundle_end_p, only_bundle_end_p, asm_p;
8719 int pos = 0, max_pos, template0, template1;
8722 enum attr_type type;
8725 /* Count insns in the EBB. */
8726 for (insn = NEXT_INSN (prev_head_insn);
8727 insn && insn != tail;
8728 insn = NEXT_INSN (insn))
8734 dfa_clean_insn_cache ();
8735 initiate_bundle_state_table ();
8736 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8737 /* First (forward) pass -- generation of bundle states. */
8738 curr_state = get_free_bundle_state ();
8739 curr_state->insn = NULL;
8740 curr_state->before_nops_num = 0;
8741 curr_state->after_nops_num = 0;
8742 curr_state->insn_num = 0;
8743 curr_state->cost = 0;
8744 curr_state->accumulated_insns_num = 0;
8745 curr_state->branch_deviation = 0;
8746 curr_state->middle_bundle_stops = 0;
8747 curr_state->next = NULL;
8748 curr_state->originator = NULL;
8749 state_reset (curr_state->dfa_state);
8750 index_to_bundle_states [0] = curr_state;
8752 /* Shift cycle mark if it is put on insn which could be ignored. */
8753 for (insn = NEXT_INSN (prev_head_insn);
8755 insn = NEXT_INSN (insn))
8757 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8758 || GET_CODE (PATTERN (insn)) == USE
8759 || GET_CODE (PATTERN (insn)) == CLOBBER)
8760 && GET_MODE (insn) == TImode)
8762 PUT_MODE (insn, VOIDmode);
8763 for (next_insn = NEXT_INSN (insn);
8765 next_insn = NEXT_INSN (next_insn))
8766 if (INSN_P (next_insn)
8767 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8768 && GET_CODE (PATTERN (next_insn)) != USE
8769 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8770 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8772 PUT_MODE (next_insn, TImode);
8776 /* Forward pass: generation of bundle states. */
8777 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8781 gcc_assert (INSN_P (insn)
8782 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8783 && GET_CODE (PATTERN (insn)) != USE
8784 && GET_CODE (PATTERN (insn)) != CLOBBER);
8785 type = ia64_safe_type (insn);
8786 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8788 index_to_bundle_states [insn_num] = NULL;
8789 for (curr_state = index_to_bundle_states [insn_num - 1];
8791 curr_state = next_state)
8793 pos = curr_state->accumulated_insns_num % 3;
8794 next_state = curr_state->next;
8795 /* We must fill up the current bundle in order to start a
8796 subsequent asm insn in a new bundle. Asm insn is always
8797 placed in a separate bundle. */
8799 = (next_insn != NULL_RTX
8800 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8801 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8802 /* We may fill up the current bundle if it is the cycle end
8803 without a group barrier. */
8805 = (only_bundle_end_p || next_insn == NULL_RTX
8806 || (GET_MODE (next_insn) == TImode
8807 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8808 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8810 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8812 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8814 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8817 gcc_assert (index_to_bundle_states [insn_num]);
8818 for (curr_state = index_to_bundle_states [insn_num];
8820 curr_state = curr_state->next)
8821 if (verbose >= 2 && dump)
8823 /* This structure is taken from generated code of the
8824 pipeline hazard recognizer (see file insn-attrtab.c).
8825 Please don't forget to change the structure if a new
8826 automaton is added to .md file. */
8829 unsigned short one_automaton_state;
8830 unsigned short oneb_automaton_state;
8831 unsigned short two_automaton_state;
8832 unsigned short twob_automaton_state;
8837 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8838 curr_state->unique_num,
8839 (curr_state->originator == NULL
8840 ? -1 : curr_state->originator->unique_num),
8842 curr_state->before_nops_num, curr_state->after_nops_num,
8843 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8844 curr_state->middle_bundle_stops,
8845 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8850 /* We should find a solution because the 2nd insn scheduling has
8852 gcc_assert (index_to_bundle_states [insn_num]);
8853 /* Find a state corresponding to the best insn sequence. */
8855 for (curr_state = index_to_bundle_states [insn_num];
8857 curr_state = curr_state->next)
8858 /* We are just looking at the states with fully filled up last
8859 bundle. The first we prefer insn sequences with minimal cost
8860 then with minimal inserted nops and finally with branch insns
8861 placed in the 3rd slots. */
8862 if (curr_state->accumulated_insns_num % 3 == 0
8863 && (best_state == NULL || best_state->cost > curr_state->cost
8864 || (best_state->cost == curr_state->cost
8865 && (curr_state->accumulated_insns_num
8866 < best_state->accumulated_insns_num
8867 || (curr_state->accumulated_insns_num
8868 == best_state->accumulated_insns_num
8869 && (curr_state->branch_deviation
8870 < best_state->branch_deviation
8871 || (curr_state->branch_deviation
8872 == best_state->branch_deviation
8873 && curr_state->middle_bundle_stops
8874 < best_state->middle_bundle_stops)))))))
8875 best_state = curr_state;
8876 /* Second (backward) pass: adding nops and templates. */
8877 gcc_assert (best_state);
8878 insn_num = best_state->before_nops_num;
8879 template0 = template1 = -1;
8880 for (curr_state = best_state;
8881 curr_state->originator != NULL;
8882 curr_state = curr_state->originator)
8884 insn = curr_state->insn;
8885 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8886 || asm_noperands (PATTERN (insn)) >= 0);
8888 if (verbose >= 2 && dump)
8892 unsigned short one_automaton_state;
8893 unsigned short oneb_automaton_state;
8894 unsigned short two_automaton_state;
8895 unsigned short twob_automaton_state;
8900 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8901 curr_state->unique_num,
8902 (curr_state->originator == NULL
8903 ? -1 : curr_state->originator->unique_num),
8905 curr_state->before_nops_num, curr_state->after_nops_num,
8906 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8907 curr_state->middle_bundle_stops,
8908 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8911 /* Find the position in the current bundle window. The window can
8912 contain at most two bundles. Two bundle window means that
8913 the processor will make two bundle rotation. */
8914 max_pos = get_max_pos (curr_state->dfa_state);
8916 /* The following (negative template number) means that the
8917 processor did one bundle rotation. */
8918 || (max_pos == 3 && template0 < 0))
8920 /* We are at the end of the window -- find template(s) for
8924 template0 = get_template (curr_state->dfa_state, 3);
8927 template1 = get_template (curr_state->dfa_state, 3);
8928 template0 = get_template (curr_state->dfa_state, 6);
8931 if (max_pos > 3 && template1 < 0)
8932 /* It may happen when we have the stop inside a bundle. */
8934 gcc_assert (pos <= 3);
8935 template1 = get_template (curr_state->dfa_state, 3);
8939 /* Emit nops after the current insn. */
8940 for (i = 0; i < curr_state->after_nops_num; i++)
8943 emit_insn_after (nop, insn);
8945 gcc_assert (pos >= 0);
8948 /* We are at the start of a bundle: emit the template
8949 (it should be defined). */
8950 gcc_assert (template0 >= 0);
8951 ia64_add_bundle_selector_before (template0, nop);
8952 /* If we have two bundle window, we make one bundle
8953 rotation. Otherwise template0 will be undefined
8954 (negative value). */
8955 template0 = template1;
8959 /* Move the position backward in the window. Group barrier has
8960 no slot. Asm insn takes all bundle. */
8961 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8962 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8963 && asm_noperands (PATTERN (insn)) < 0)
8965 /* Long insn takes 2 slots. */
8966 if (ia64_safe_type (insn) == TYPE_L)
8968 gcc_assert (pos >= 0);
8970 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8971 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8972 && asm_noperands (PATTERN (insn)) < 0)
8974 /* The current insn is at the bundle start: emit the
8976 gcc_assert (template0 >= 0);
8977 ia64_add_bundle_selector_before (template0, insn);
8978 b = PREV_INSN (insn);
8980 /* See comment above in analogous place for emitting nops
8982 template0 = template1;
8985 /* Emit nops after the current insn. */
8986 for (i = 0; i < curr_state->before_nops_num; i++)
8989 ia64_emit_insn_before (nop, insn);
8990 nop = PREV_INSN (insn);
8993 gcc_assert (pos >= 0);
8996 /* See comment above in analogous place for emitting nops
8998 gcc_assert (template0 >= 0);
8999 ia64_add_bundle_selector_before (template0, insn);
9000 b = PREV_INSN (insn);
9002 template0 = template1;
9008 #ifdef ENABLE_CHECKING
9010 /* Assert right calculation of middle_bundle_stops. */
9011 int num = best_state->middle_bundle_stops;
9012 bool start_bundle = true, end_bundle = false;
9014 for (insn = NEXT_INSN (prev_head_insn);
9015 insn && insn != tail;
9016 insn = NEXT_INSN (insn))
9020 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9021 start_bundle = true;
9026 for (next_insn = NEXT_INSN (insn);
9027 next_insn && next_insn != tail;
9028 next_insn = NEXT_INSN (next_insn))
9029 if (INSN_P (next_insn)
9030 && (ia64_safe_itanium_class (next_insn)
9031 != ITANIUM_CLASS_IGNORE
9032 || recog_memoized (next_insn)
9033 == CODE_FOR_bundle_selector)
9034 && GET_CODE (PATTERN (next_insn)) != USE
9035 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9038 end_bundle = next_insn == NULL_RTX
9039 || next_insn == tail
9040 || (INSN_P (next_insn)
9041 && recog_memoized (next_insn)
9042 == CODE_FOR_bundle_selector);
9043 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9044 && !start_bundle && !end_bundle
9046 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
9047 && asm_noperands (PATTERN (next_insn)) < 0)
9050 start_bundle = false;
9054 gcc_assert (num == 0);
9058 free (index_to_bundle_states);
9059 finish_bundle_state_table ();
9061 dfa_clean_insn_cache ();
9064 /* The following function is called at the end of scheduling BB or
9065 EBB. After reload, it inserts stop bits and does insn bundling. */
9068 ia64_sched_finish (FILE *dump, int sched_verbose)
9071 fprintf (dump, "// Finishing schedule.\n");
9072 if (!reload_completed)
9074 if (reload_completed)
9076 final_emit_insn_group_barriers (dump);
9077 bundling (dump, sched_verbose, current_sched_info->prev_head,
9078 current_sched_info->next_tail);
9079 if (sched_verbose && dump)
9080 fprintf (dump, "// finishing %d-%d\n",
9081 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9082 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9088 /* The following function inserts stop bits in scheduled BB or EBB. */
9091 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9094 int need_barrier_p = 0;
9095 int seen_good_insn = 0;
9097 init_insn_group_barriers ();
9099 for (insn = NEXT_INSN (current_sched_info->prev_head);
9100 insn != current_sched_info->next_tail;
9101 insn = NEXT_INSN (insn))
9103 if (GET_CODE (insn) == BARRIER)
9105 rtx last = prev_active_insn (insn);
9109 if (GET_CODE (last) == JUMP_INSN
9110 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9111 last = prev_active_insn (last);
9112 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9113 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9115 init_insn_group_barriers ();
9119 else if (NONDEBUG_INSN_P (insn))
9121 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9123 init_insn_group_barriers ();
9127 else if (need_barrier_p || group_barrier_needed (insn)
9128 || (mflag_sched_stop_bits_after_every_cycle
9129 && GET_MODE (insn) == TImode
9132 if (TARGET_EARLY_STOP_BITS)
9137 last != current_sched_info->prev_head;
9138 last = PREV_INSN (last))
9139 if (INSN_P (last) && GET_MODE (last) == TImode
9140 && stops_p [INSN_UID (last)])
9142 if (last == current_sched_info->prev_head)
9144 last = prev_active_insn (last);
9146 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9147 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9149 init_insn_group_barriers ();
9150 for (last = NEXT_INSN (last);
9152 last = NEXT_INSN (last))
9155 group_barrier_needed (last);
9156 if (recog_memoized (last) >= 0
9157 && important_for_bundling_p (last))
9163 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9165 init_insn_group_barriers ();
9168 group_barrier_needed (insn);
9169 if (recog_memoized (insn) >= 0
9170 && important_for_bundling_p (insn))
9173 else if (recog_memoized (insn) >= 0
9174 && important_for_bundling_p (insn))
9176 need_barrier_p = (GET_CODE (insn) == CALL_INSN
9177 || GET_CODE (PATTERN (insn)) == ASM_INPUT
9178 || asm_noperands (PATTERN (insn)) >= 0);
9185 /* If the following function returns TRUE, we will use the DFA
9189 ia64_first_cycle_multipass_dfa_lookahead (void)
9191 return (reload_completed ? 6 : 4);
9194 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9197 ia64_init_dfa_pre_cycle_insn (void)
9199 if (temp_dfa_state == NULL)
9201 dfa_state_size = state_size ();
9202 temp_dfa_state = xmalloc (dfa_state_size);
9203 prev_cycle_state = xmalloc (dfa_state_size);
9205 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9206 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9207 recog_memoized (dfa_pre_cycle_insn);
9208 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9209 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9210 recog_memoized (dfa_stop_insn);
9213 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9214 used by the DFA insn scheduler. */
9217 ia64_dfa_pre_cycle_insn (void)
9219 return dfa_pre_cycle_insn;
9222 /* The following function returns TRUE if PRODUCER (of type ilog or
9223 ld) produces address for CONSUMER (of type st or stf). */
9226 ia64_st_address_bypass_p (rtx producer, rtx consumer)
9230 gcc_assert (producer && consumer);
9231 dest = ia64_single_set (producer);
9233 reg = SET_DEST (dest);
9235 if (GET_CODE (reg) == SUBREG)
9236 reg = SUBREG_REG (reg);
9237 gcc_assert (GET_CODE (reg) == REG);
9239 dest = ia64_single_set (consumer);
9241 mem = SET_DEST (dest);
9242 gcc_assert (mem && GET_CODE (mem) == MEM);
9243 return reg_mentioned_p (reg, mem);
9246 /* The following function returns TRUE if PRODUCER (of type ilog or
9247 ld) produces address for CONSUMER (of type ld or fld). */
9250 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9252 rtx dest, src, reg, mem;
9254 gcc_assert (producer && consumer);
9255 dest = ia64_single_set (producer);
9257 reg = SET_DEST (dest);
9259 if (GET_CODE (reg) == SUBREG)
9260 reg = SUBREG_REG (reg);
9261 gcc_assert (GET_CODE (reg) == REG);
9263 src = ia64_single_set (consumer);
9265 mem = SET_SRC (src);
9268 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9269 mem = XVECEXP (mem, 0, 0);
9270 else if (GET_CODE (mem) == IF_THEN_ELSE)
9271 /* ??? Is this bypass necessary for ld.c? */
9273 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9274 mem = XEXP (mem, 1);
9277 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9278 mem = XEXP (mem, 0);
9280 if (GET_CODE (mem) == UNSPEC)
9282 int c = XINT (mem, 1);
9284 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9285 || c == UNSPEC_LDSA);
9286 mem = XVECEXP (mem, 0, 0);
9289 /* Note that LO_SUM is used for GOT loads. */
9290 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9292 return reg_mentioned_p (reg, mem);
9295 /* The following function returns TRUE if INSN produces address for a
9296 load/store insn. We will place such insns into M slot because it
9297 decreases its latency time. */
9300 ia64_produce_address_p (rtx insn)
9306 /* Emit pseudo-ops for the assembler to describe predicate relations.
9307 At present this assumes that we only consider predicate pairs to
9308 be mutex, and that the assembler can deduce proper values from
9309 straight-line code. */
9312 emit_predicate_relation_info (void)
9316 FOR_EACH_BB_REVERSE (bb)
9319 rtx head = BB_HEAD (bb);
9321 /* We only need such notes at code labels. */
9322 if (GET_CODE (head) != CODE_LABEL)
9324 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9325 head = NEXT_INSN (head);
9327 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9328 grabbing the entire block of predicate registers. */
9329 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9330 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9332 rtx p = gen_rtx_REG (BImode, r);
9333 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9334 if (head == BB_END (bb))
9340 /* Look for conditional calls that do not return, and protect predicate
9341 relations around them. Otherwise the assembler will assume the call
9342 returns, and complain about uses of call-clobbered predicates after
9344 FOR_EACH_BB_REVERSE (bb)
9346 rtx insn = BB_HEAD (bb);
9350 if (GET_CODE (insn) == CALL_INSN
9351 && GET_CODE (PATTERN (insn)) == COND_EXEC
9352 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9354 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9355 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9356 if (BB_HEAD (bb) == insn)
9358 if (BB_END (bb) == insn)
9362 if (insn == BB_END (bb))
9364 insn = NEXT_INSN (insn);
9369 /* Perform machine dependent operations on the rtl chain INSNS. */
9374 /* We are freeing block_for_insn in the toplev to keep compatibility
9375 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9376 compute_bb_for_insn ();
9378 /* If optimizing, we'll have split before scheduling. */
9382 if (optimize && ia64_flag_schedule_insns2
9383 && dbg_cnt (ia64_sched2))
9385 timevar_push (TV_SCHED2);
9386 ia64_final_schedule = 1;
9388 initiate_bundle_states ();
9389 ia64_nop = make_insn_raw (gen_nop ());
9390 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9391 recog_memoized (ia64_nop);
9392 clocks_length = get_max_uid () + 1;
9393 stops_p = XCNEWVEC (char, clocks_length);
9395 if (ia64_tune == PROCESSOR_ITANIUM2)
9397 pos_1 = get_cpu_unit_code ("2_1");
9398 pos_2 = get_cpu_unit_code ("2_2");
9399 pos_3 = get_cpu_unit_code ("2_3");
9400 pos_4 = get_cpu_unit_code ("2_4");
9401 pos_5 = get_cpu_unit_code ("2_5");
9402 pos_6 = get_cpu_unit_code ("2_6");
9403 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9404 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9405 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9406 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9407 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9408 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9409 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9410 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9411 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9412 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9413 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9414 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9415 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9416 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9417 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9418 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9419 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9420 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9421 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9422 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9426 pos_1 = get_cpu_unit_code ("1_1");
9427 pos_2 = get_cpu_unit_code ("1_2");
9428 pos_3 = get_cpu_unit_code ("1_3");
9429 pos_4 = get_cpu_unit_code ("1_4");
9430 pos_5 = get_cpu_unit_code ("1_5");
9431 pos_6 = get_cpu_unit_code ("1_6");
9432 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9433 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9434 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9435 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9436 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9437 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9438 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9439 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9440 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9441 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9442 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9443 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9444 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9445 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9446 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9447 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9448 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9449 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9450 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9451 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9454 if (flag_selective_scheduling2
9455 && !maybe_skip_selective_scheduling ())
9456 run_selective_scheduling ();
9460 /* Redo alignment computation, as it might gone wrong. */
9461 compute_alignments ();
9463 /* We cannot reuse this one because it has been corrupted by the
9465 finish_bundle_states ();
9468 emit_insn_group_barriers (dump_file);
9470 ia64_final_schedule = 0;
9471 timevar_pop (TV_SCHED2);
9474 emit_all_insn_group_barriers (dump_file);
9478 /* A call must not be the last instruction in a function, so that the
9479 return address is still within the function, so that unwinding works
9480 properly. Note that IA-64 differs from dwarf2 on this point. */
9481 if (ia64_except_unwind_info () == UI_TARGET)
9486 insn = get_last_insn ();
9487 if (! INSN_P (insn))
9488 insn = prev_active_insn (insn);
9491 /* Skip over insns that expand to nothing. */
9492 while (GET_CODE (insn) == INSN
9493 && get_attr_empty (insn) == EMPTY_YES)
9495 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9496 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9498 insn = prev_active_insn (insn);
9500 if (GET_CODE (insn) == CALL_INSN)
9503 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9504 emit_insn (gen_break_f ());
9505 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9510 emit_predicate_relation_info ();
9512 if (ia64_flag_var_tracking)
9514 timevar_push (TV_VAR_TRACKING);
9515 variable_tracking_main ();
9516 timevar_pop (TV_VAR_TRACKING);
9518 df_finish_pass (false);
9521 /* Return true if REGNO is used by the epilogue. */
9524 ia64_epilogue_uses (int regno)
9529 /* With a call to a function in another module, we will write a new
9530 value to "gp". After returning from such a call, we need to make
9531 sure the function restores the original gp-value, even if the
9532 function itself does not use the gp anymore. */
9533 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9535 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9536 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9537 /* For functions defined with the syscall_linkage attribute, all
9538 input registers are marked as live at all function exits. This
9539 prevents the register allocator from using the input registers,
9540 which in turn makes it possible to restart a system call after
9541 an interrupt without having to save/restore the input registers.
9542 This also prevents kernel data from leaking to application code. */
9543 return lookup_attribute ("syscall_linkage",
9544 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9547 /* Conditional return patterns can't represent the use of `b0' as
9548 the return address, so we force the value live this way. */
9552 /* Likewise for ar.pfs, which is used by br.ret. */
9560 /* Return true if REGNO is used by the frame unwinder. */
9563 ia64_eh_uses (int regno)
9567 if (! reload_completed)
9573 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9574 if (regno == current_frame_info.r[r]
9575 || regno == emitted_frame_related_regs[r])
9581 /* Return true if this goes in small data/bss. */
9583 /* ??? We could also support own long data here. Generating movl/add/ld8
9584 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9585 code faster because there is one less load. This also includes incomplete
9586 types which can't go in sdata/sbss. */
9589 ia64_in_small_data_p (const_tree exp)
9591 if (TARGET_NO_SDATA)
9594 /* We want to merge strings, so we never consider them small data. */
9595 if (TREE_CODE (exp) == STRING_CST)
9598 /* Functions are never small data. */
9599 if (TREE_CODE (exp) == FUNCTION_DECL)
9602 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9604 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9606 if (strcmp (section, ".sdata") == 0
9607 || strncmp (section, ".sdata.", 7) == 0
9608 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9609 || strcmp (section, ".sbss") == 0
9610 || strncmp (section, ".sbss.", 6) == 0
9611 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9616 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9618 /* If this is an incomplete type with size 0, then we can't put it
9619 in sdata because it might be too big when completed. */
9620 if (size > 0 && size <= ia64_section_threshold)
9627 /* Output assembly directives for prologue regions. */
9629 /* The current basic block number. */
9631 static bool last_block;
9633 /* True if we need a copy_state command at the start of the next block. */
9635 static bool need_copy_state;
9637 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9638 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9641 /* Emit a debugging label after a call-frame-related insn. We'd
9642 rather output the label right away, but we'd have to output it
9643 after, not before, the instruction, and the instruction has not
9644 been output yet. So we emit the label after the insn, delete it to
9645 avoid introducing basic blocks, and mark it as preserved, such that
9646 it is still output, given that it is referenced in debug info. */
9649 ia64_emit_deleted_label_after_insn (rtx insn)
9651 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9652 rtx lb = gen_label_rtx ();
9653 rtx label_insn = emit_label_after (lb, insn);
9655 LABEL_PRESERVE_P (lb) = 1;
9657 delete_insn (label_insn);
9659 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9661 return xstrdup (label);
9664 /* Define the CFA after INSN with the steady-state definition. */
9667 ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
9669 rtx fp = frame_pointer_needed
9670 ? hard_frame_pointer_rtx
9671 : stack_pointer_rtx;
9672 const char *label = ia64_emit_deleted_label_after_insn (insn);
9679 ia64_initial_elimination_offset
9680 (REGNO (arg_pointer_rtx), REGNO (fp))
9681 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9684 /* All we need to do here is avoid a crash in the generic dwarf2
9685 processing. The real CFA definition is set up above. */
9688 ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label),
9689 rtx ARG_UNUSED (pattern),
9692 gcc_assert (index == UNSPECV_ALLOC);
9695 /* The generic dwarf2 frame debug info generator does not define a
9696 separate region for the very end of the epilogue, so refrain from
9697 doing so in the IA64-specific code as well. */
9699 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9701 /* The function emits unwind directives for the start of an epilogue. */
9704 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9706 /* If this isn't the last block of the function, then we need to label the
9707 current state, and copy it back in at the start of the next block. */
9712 fprintf (asm_out_file, "\t.label_state %d\n",
9713 ++cfun->machine->state_num);
9714 need_copy_state = true;
9718 fprintf (asm_out_file, "\t.restore sp\n");
9719 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9720 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9721 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9724 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9727 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9728 bool unwind, bool frame)
9730 rtx dest = SET_DEST (pat);
9731 rtx src = SET_SRC (pat);
9733 if (dest == stack_pointer_rtx)
9735 if (GET_CODE (src) == PLUS)
9737 rtx op0 = XEXP (src, 0);
9738 rtx op1 = XEXP (src, 1);
9740 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9742 if (INTVAL (op1) < 0)
9744 gcc_assert (!frame_pointer_needed);
9746 fprintf (asm_out_file,
9747 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9749 ia64_dwarf2out_def_steady_cfa (insn, frame);
9752 process_epilogue (asm_out_file, insn, unwind, frame);
9756 gcc_assert (src == hard_frame_pointer_rtx);
9757 process_epilogue (asm_out_file, insn, unwind, frame);
9760 else if (dest == hard_frame_pointer_rtx)
9762 gcc_assert (src == stack_pointer_rtx);
9763 gcc_assert (frame_pointer_needed);
9766 fprintf (asm_out_file, "\t.vframe r%d\n",
9767 ia64_dbx_register_number (REGNO (dest)));
9768 ia64_dwarf2out_def_steady_cfa (insn, frame);
9774 /* This function processes a SET pattern for REG_CFA_REGISTER. */
9777 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
9779 rtx dest = SET_DEST (pat);
9780 rtx src = SET_SRC (pat);
9782 int dest_regno = REGNO (dest);
9783 int src_regno = REGNO (src);
9788 /* Saving return address pointer. */
9789 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9791 fprintf (asm_out_file, "\t.save rp, r%d\n",
9792 ia64_dbx_register_number (dest_regno));
9796 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9798 fprintf (asm_out_file, "\t.save pr, r%d\n",
9799 ia64_dbx_register_number (dest_regno));
9802 case AR_UNAT_REGNUM:
9803 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9805 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9806 ia64_dbx_register_number (dest_regno));
9810 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9812 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9813 ia64_dbx_register_number (dest_regno));
9817 /* Everything else should indicate being stored to memory. */
9822 /* This function processes a SET pattern for REG_CFA_OFFSET. */
9825 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
9827 rtx dest = SET_DEST (pat);
9828 rtx src = SET_SRC (pat);
9829 int src_regno = REGNO (src);
9834 gcc_assert (MEM_P (dest));
9835 if (GET_CODE (XEXP (dest, 0)) == REG)
9837 base = XEXP (dest, 0);
9842 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9843 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9844 base = XEXP (XEXP (dest, 0), 0);
9845 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9848 if (base == hard_frame_pointer_rtx)
9850 saveop = ".savepsp";
9855 gcc_assert (base == stack_pointer_rtx);
9859 src_regno = REGNO (src);
9863 gcc_assert (!current_frame_info.r[reg_save_b0]);
9865 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
9870 gcc_assert (!current_frame_info.r[reg_save_pr]);
9872 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
9877 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9879 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
9884 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9886 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
9890 case AR_UNAT_REGNUM:
9891 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9893 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
9902 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9903 1 << (src_regno - GR_REG (4)));
9912 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9913 1 << (src_regno - BR_REG (1)));
9921 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9922 1 << (src_regno - FR_REG (2)));
9925 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9926 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9927 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9928 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9930 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9931 1 << (src_regno - FR_REG (12)));
9935 /* ??? For some reason we mark other general registers, even those
9936 we can't represent in the unwind info. Ignore them. */
9941 /* This function looks at a single insn and emits any directives
9942 required to unwind this insn. */
9945 ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
9947 bool unwind = ia64_except_unwind_info () == UI_TARGET;
9948 bool frame = dwarf2out_do_frame ();
9952 if (!unwind && !frame)
9955 if (NOTE_INSN_BASIC_BLOCK_P (insn))
9957 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9959 /* Restore unwind state from immediately before the epilogue. */
9960 if (need_copy_state)
9964 fprintf (asm_out_file, "\t.body\n");
9965 fprintf (asm_out_file, "\t.copy_state %d\n",
9966 cfun->machine->state_num);
9968 if (IA64_CHANGE_CFA_IN_EPILOGUE)
9969 ia64_dwarf2out_def_steady_cfa (insn, frame);
9970 need_copy_state = false;
9974 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9977 /* Look for the ALLOC insn. */
9978 if (INSN_CODE (insn) == CODE_FOR_alloc)
9980 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
9981 int dest_regno = REGNO (dest);
9983 /* If this is the final destination for ar.pfs, then this must
9984 be the alloc in the prologue. */
9985 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
9988 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9989 ia64_dbx_register_number (dest_regno));
9993 /* This must be an alloc before a sibcall. We must drop the
9994 old frame info. The easiest way to drop the old frame
9995 info is to ensure we had a ".restore sp" directive
9996 followed by a new prologue. If the procedure doesn't
9997 have a memory-stack frame, we'll issue a dummy ".restore
9999 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10000 /* if haven't done process_epilogue() yet, do it now */
10001 process_epilogue (asm_out_file, insn, unwind, frame);
10003 fprintf (asm_out_file, "\t.prologue\n");
10008 handled_one = false;
10009 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10010 switch (REG_NOTE_KIND (note))
10012 case REG_CFA_ADJUST_CFA:
10013 pat = XEXP (note, 0);
10015 pat = PATTERN (insn);
10016 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10017 handled_one = true;
10020 case REG_CFA_OFFSET:
10021 pat = XEXP (note, 0);
10023 pat = PATTERN (insn);
10024 process_cfa_offset (asm_out_file, pat, unwind);
10025 handled_one = true;
10028 case REG_CFA_REGISTER:
10029 pat = XEXP (note, 0);
10031 pat = PATTERN (insn);
10032 process_cfa_register (asm_out_file, pat, unwind);
10033 handled_one = true;
10036 case REG_FRAME_RELATED_EXPR:
10037 case REG_CFA_DEF_CFA:
10038 case REG_CFA_EXPRESSION:
10039 case REG_CFA_RESTORE:
10040 case REG_CFA_SET_VDRAP:
10041 /* Not used in the ia64 port. */
10042 gcc_unreachable ();
10045 /* Not a frame-related note. */
10049 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10050 explicit action to take. No guessing required. */
10051 gcc_assert (handled_one);
10054 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10057 ia64_asm_emit_except_personality (rtx personality)
10059 fputs ("\t.personality\t", asm_out_file);
10060 output_addr_const (asm_out_file, personality);
10061 fputc ('\n', asm_out_file);
10064 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10067 ia64_asm_init_sections (void)
10069 exception_section = get_unnamed_section (0, output_section_asm_op,
10073 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10075 static enum unwind_info_type
10076 ia64_debug_unwind_info (void)
10081 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
10083 static enum unwind_info_type
10084 ia64_except_unwind_info (void)
10086 /* Honor the --enable-sjlj-exceptions configure switch. */
10087 #ifdef CONFIG_UNWIND_EXCEPTIONS
10088 if (CONFIG_UNWIND_EXCEPTIONS)
10092 /* For simplicity elsewhere in this file, indicate that all unwind
10093 info is disabled if we're not emitting unwind tables. */
10094 if (!flag_exceptions && !flag_unwind_tables)
10103 IA64_BUILTIN_COPYSIGNQ,
10104 IA64_BUILTIN_FABSQ,
10105 IA64_BUILTIN_FLUSHRS,
10107 IA64_BUILTIN_HUGE_VALQ,
10111 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10114 ia64_init_builtins (void)
10120 /* The __fpreg type. */
10121 fpreg_type = make_node (REAL_TYPE);
10122 TYPE_PRECISION (fpreg_type) = 82;
10123 layout_type (fpreg_type);
10124 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10126 /* The __float80 type. */
10127 float80_type = make_node (REAL_TYPE);
10128 TYPE_PRECISION (float80_type) = 80;
10129 layout_type (float80_type);
10130 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10132 /* The __float128 type. */
10136 tree float128_type = make_node (REAL_TYPE);
10138 TYPE_PRECISION (float128_type) = 128;
10139 layout_type (float128_type);
10140 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10142 /* TFmode support builtins. */
10143 ftype = build_function_type (float128_type, void_list_node);
10144 decl = add_builtin_function ("__builtin_infq", ftype,
10145 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10147 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10149 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10150 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10152 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10154 ftype = build_function_type_list (float128_type,
10157 decl = add_builtin_function ("__builtin_fabsq", ftype,
10158 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10159 "__fabstf2", NULL_TREE);
10160 TREE_READONLY (decl) = 1;
10161 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10163 ftype = build_function_type_list (float128_type,
10167 decl = add_builtin_function ("__builtin_copysignq", ftype,
10168 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10169 "__copysigntf3", NULL_TREE);
10170 TREE_READONLY (decl) = 1;
10171 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10174 /* Under HPUX, this is a synonym for "long double". */
10175 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10178 /* Fwrite on VMS is non-standard. */
10179 if (TARGET_ABI_OPEN_VMS)
10181 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
10182 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
10185 #define def_builtin(name, type, code) \
10186 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10189 decl = def_builtin ("__builtin_ia64_bsp",
10190 build_function_type (ptr_type_node, void_list_node),
10192 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10194 decl = def_builtin ("__builtin_ia64_flushrs",
10195 build_function_type (void_type_node, void_list_node),
10196 IA64_BUILTIN_FLUSHRS);
10197 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10203 if (built_in_decls [BUILT_IN_FINITE])
10204 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
10206 if (built_in_decls [BUILT_IN_FINITEF])
10207 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
10209 if (built_in_decls [BUILT_IN_FINITEL])
10210 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
10216 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10217 enum machine_mode mode ATTRIBUTE_UNUSED,
10218 int ignore ATTRIBUTE_UNUSED)
10220 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10221 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10225 case IA64_BUILTIN_BSP:
10226 if (! target || ! register_operand (target, DImode))
10227 target = gen_reg_rtx (DImode);
10228 emit_insn (gen_bsp_value (target));
10229 #ifdef POINTERS_EXTEND_UNSIGNED
10230 target = convert_memory_address (ptr_mode, target);
10234 case IA64_BUILTIN_FLUSHRS:
10235 emit_insn (gen_flushrs ());
10238 case IA64_BUILTIN_INFQ:
10239 case IA64_BUILTIN_HUGE_VALQ:
10241 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10242 REAL_VALUE_TYPE inf;
10246 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10248 tmp = validize_mem (force_const_mem (target_mode, tmp));
10251 target = gen_reg_rtx (target_mode);
10253 emit_move_insn (target, tmp);
10257 case IA64_BUILTIN_FABSQ:
10258 case IA64_BUILTIN_COPYSIGNQ:
10259 return expand_call (exp, target, ignore);
10262 gcc_unreachable ();
10268 /* Return the ia64 builtin for CODE. */
10271 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10273 if (code >= IA64_BUILTIN_max)
10274 return error_mark_node;
10276 return ia64_builtins[code];
10279 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10280 most significant bits of the stack slot. */
10283 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
10285 /* Exception to normal case for structures/unions/etc. */
10287 if (type && AGGREGATE_TYPE_P (type)
10288 && int_size_in_bytes (type) < UNITS_PER_WORD)
10291 /* Fall back to the default. */
10292 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10295 /* Emit text to declare externally defined variables and functions, because
10296 the Intel assembler does not support undefined externals. */
10299 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10301 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10302 set in order to avoid putting out names that are never really
10304 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10306 /* maybe_assemble_visibility will return 1 if the assembler
10307 visibility directive is output. */
10308 int need_visibility = ((*targetm.binds_local_p) (decl)
10309 && maybe_assemble_visibility (decl));
10311 #ifdef DO_CRTL_NAMES
10315 /* GNU as does not need anything here, but the HP linker does
10316 need something for external functions. */
10317 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10318 && TREE_CODE (decl) == FUNCTION_DECL)
10319 (*targetm.asm_out.globalize_decl_name) (file, decl);
10320 else if (need_visibility && !TARGET_GNU_AS)
10321 (*targetm.asm_out.globalize_label) (file, name);
10325 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10326 modes of word_mode and larger. Rename the TFmode libfuncs using the
10327 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10328 backward compatibility. */
10331 ia64_init_libfuncs (void)
10333 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10334 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10335 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10336 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10338 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10339 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10340 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10341 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10342 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10344 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10345 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10346 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10347 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10348 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10349 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10351 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10352 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10353 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10354 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10355 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10357 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10358 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10359 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10360 /* HP-UX 11.23 libc does not have a function for unsigned
10361 SImode-to-TFmode conversion. */
10362 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10365 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10368 ia64_hpux_init_libfuncs (void)
10370 ia64_init_libfuncs ();
10372 /* The HP SI millicode division and mod functions expect DI arguments.
10373 By turning them off completely we avoid using both libgcc and the
10374 non-standard millicode routines and use the HP DI millicode routines
10377 set_optab_libfunc (sdiv_optab, SImode, 0);
10378 set_optab_libfunc (udiv_optab, SImode, 0);
10379 set_optab_libfunc (smod_optab, SImode, 0);
10380 set_optab_libfunc (umod_optab, SImode, 0);
10382 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10383 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10384 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10385 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10387 /* HP-UX libc has TF min/max/abs routines in it. */
10388 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10389 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10390 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10392 /* ia64_expand_compare uses this. */
10393 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10395 /* These should never be used. */
10396 set_optab_libfunc (eq_optab, TFmode, 0);
10397 set_optab_libfunc (ne_optab, TFmode, 0);
10398 set_optab_libfunc (gt_optab, TFmode, 0);
10399 set_optab_libfunc (ge_optab, TFmode, 0);
10400 set_optab_libfunc (lt_optab, TFmode, 0);
10401 set_optab_libfunc (le_optab, TFmode, 0);
10404 /* Rename the division and modulus functions in VMS. */
10407 ia64_vms_init_libfuncs (void)
10409 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10410 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10411 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10412 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10413 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10414 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10415 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10416 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10417 abort_libfunc = init_one_libfunc ("decc$abort");
10418 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10419 #ifdef MEM_LIBFUNCS_INIT
10424 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10425 the HPUX conventions. */
10428 ia64_sysv4_init_libfuncs (void)
10430 ia64_init_libfuncs ();
10432 /* These functions are not part of the HPUX TFmode interface. We
10433 use them instead of _U_Qfcmp, which doesn't work the way we
10435 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10436 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10437 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10438 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10439 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10440 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10442 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10443 glibc doesn't have them. */
10449 ia64_soft_fp_init_libfuncs (void)
10454 ia64_vms_valid_pointer_mode (enum machine_mode mode)
10456 return (mode == SImode || mode == DImode);
10459 /* For HPUX, it is illegal to have relocations in shared segments. */
10462 ia64_hpux_reloc_rw_mask (void)
10467 /* For others, relax this so that relocations to local data goes in
10468 read-only segments, but we still cannot allow global relocations
10469 in read-only segments. */
10472 ia64_reloc_rw_mask (void)
10474 return flag_pic ? 3 : 2;
10477 /* Return the section to use for X. The only special thing we do here
10478 is to honor small data. */
10481 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10482 unsigned HOST_WIDE_INT align)
10484 if (GET_MODE_SIZE (mode) > 0
10485 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10486 && !TARGET_NO_SDATA)
10487 return sdata_section;
10489 return default_elf_select_rtx_section (mode, x, align);
10492 static unsigned int
10493 ia64_section_type_flags (tree decl, const char *name, int reloc)
10495 unsigned int flags = 0;
10497 if (strcmp (name, ".sdata") == 0
10498 || strncmp (name, ".sdata.", 7) == 0
10499 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10500 || strncmp (name, ".sdata2.", 8) == 0
10501 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10502 || strcmp (name, ".sbss") == 0
10503 || strncmp (name, ".sbss.", 6) == 0
10504 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10505 flags = SECTION_SMALL;
10507 #if TARGET_ABI_OPEN_VMS
10508 if (decl && DECL_ATTRIBUTES (decl)
10509 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10510 flags |= SECTION_VMS_OVERLAY;
10513 flags |= default_section_type_flags (decl, name, reloc);
10517 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10518 structure type and that the address of that type should be passed
10519 in out0, rather than in r8. */
10522 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10524 tree ret_type = TREE_TYPE (fntype);
10526 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10527 as the structure return address parameter, if the return value
10528 type has a non-trivial copy constructor or destructor. It is not
10529 clear if this same convention should be used for other
10530 programming languages. Until G++ 3.4, we incorrectly used r8 for
10531 these return values. */
10532 return (abi_version_at_least (2)
10534 && TYPE_MODE (ret_type) == BLKmode
10535 && TREE_ADDRESSABLE (ret_type)
10536 && strcmp (lang_hooks.name, "GNU C++") == 0);
10539 /* Output the assembler code for a thunk function. THUNK_DECL is the
10540 declaration for the thunk function itself, FUNCTION is the decl for
10541 the target function. DELTA is an immediate constant offset to be
10542 added to THIS. If VCALL_OFFSET is nonzero, the word at
10543 *(*this + vcall_offset) should be added to THIS. */
10546 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10547 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10550 rtx this_rtx, insn, funexp;
10551 unsigned int this_parmno;
10552 unsigned int this_regno;
10555 reload_completed = 1;
10556 epilogue_completed = 1;
10558 /* Set things up as ia64_expand_prologue might. */
10559 last_scratch_gr_reg = 15;
10561 memset (¤t_frame_info, 0, sizeof (current_frame_info));
10562 current_frame_info.spill_cfa_off = -16;
10563 current_frame_info.n_input_regs = 1;
10564 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10566 /* Mark the end of the (empty) prologue. */
10567 emit_note (NOTE_INSN_PROLOGUE_END);
10569 /* Figure out whether "this" will be the first parameter (the
10570 typical case) or the second parameter (as happens when the
10571 virtual function returns certain class objects). */
10573 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10575 this_regno = IN_REG (this_parmno);
10576 if (!TARGET_REG_NAMES)
10577 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10579 this_rtx = gen_rtx_REG (Pmode, this_regno);
10581 /* Apply the constant offset, if required. */
10582 delta_rtx = GEN_INT (delta);
10585 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10586 REG_POINTER (tmp) = 1;
10587 if (delta && satisfies_constraint_I (delta_rtx))
10589 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10593 emit_insn (gen_ptr_extend (this_rtx, tmp));
10597 if (!satisfies_constraint_I (delta_rtx))
10599 rtx tmp = gen_rtx_REG (Pmode, 2);
10600 emit_move_insn (tmp, delta_rtx);
10603 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10606 /* Apply the offset from the vtable, if required. */
10609 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10610 rtx tmp = gen_rtx_REG (Pmode, 2);
10614 rtx t = gen_rtx_REG (ptr_mode, 2);
10615 REG_POINTER (t) = 1;
10616 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10617 if (satisfies_constraint_I (vcall_offset_rtx))
10619 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10623 emit_insn (gen_ptr_extend (tmp, t));
10626 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10630 if (!satisfies_constraint_J (vcall_offset_rtx))
10632 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10633 emit_move_insn (tmp2, vcall_offset_rtx);
10634 vcall_offset_rtx = tmp2;
10636 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10640 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10642 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10644 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10647 /* Generate a tail call to the target function. */
10648 if (! TREE_USED (function))
10650 assemble_external (function);
10651 TREE_USED (function) = 1;
10653 funexp = XEXP (DECL_RTL (function), 0);
10654 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10655 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10656 insn = get_last_insn ();
10657 SIBLING_CALL_P (insn) = 1;
10659 /* Code generation for calls relies on splitting. */
10660 reload_completed = 1;
10661 epilogue_completed = 1;
10662 try_split (PATTERN (insn), insn, 0);
10666 /* Run just enough of rest_of_compilation to get the insns emitted.
10667 There's not really enough bulk here to make other passes such as
10668 instruction scheduling worth while. Note that use_thunk calls
10669 assemble_start_function and assemble_end_function. */
10671 insn_locators_alloc ();
10672 emit_all_insn_group_barriers (NULL);
10673 insn = get_insns ();
10674 shorten_branches (insn);
10675 final_start_function (insn, file, 1);
10676 final (insn, file, 1);
10677 final_end_function ();
10679 reload_completed = 0;
10680 epilogue_completed = 0;
10683 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10686 ia64_struct_value_rtx (tree fntype,
10687 int incoming ATTRIBUTE_UNUSED)
10689 if (TARGET_ABI_OPEN_VMS ||
10690 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10692 return gen_rtx_REG (Pmode, GR_REG (8));
10696 ia64_scalar_mode_supported_p (enum machine_mode mode)
10722 ia64_vector_mode_supported_p (enum machine_mode mode)
10739 /* Implement the FUNCTION_PROFILER macro. */
10742 ia64_output_function_profiler (FILE *file, int labelno)
10744 bool indirect_call;
10746 /* If the function needs a static chain and the static chain
10747 register is r15, we use an indirect call so as to bypass
10748 the PLT stub in case the executable is dynamically linked,
10749 because the stub clobbers r15 as per 5.3.6 of the psABI.
10750 We don't need to do that in non canonical PIC mode. */
10752 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10754 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10755 indirect_call = true;
10758 indirect_call = false;
10761 fputs ("\t.prologue 4, r40\n", file);
10763 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10764 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10766 if (NO_PROFILE_COUNTERS)
10767 fputs ("\tmov out3 = r0\n", file);
10771 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10773 if (TARGET_AUTO_PIC)
10774 fputs ("\tmovl out3 = @gprel(", file);
10776 fputs ("\taddl out3 = @ltoff(", file);
10777 assemble_name (file, buf);
10778 if (TARGET_AUTO_PIC)
10779 fputs (")\n", file);
10781 fputs ("), r1\n", file);
10785 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10786 fputs ("\t;;\n", file);
10788 fputs ("\t.save rp, r42\n", file);
10789 fputs ("\tmov out2 = b0\n", file);
10791 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10792 fputs ("\t.body\n", file);
10793 fputs ("\tmov out1 = r1\n", file);
10796 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10797 fputs ("\tmov b6 = r16\n", file);
10798 fputs ("\tld8 r1 = [r14]\n", file);
10799 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10802 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10805 static GTY(()) rtx mcount_func_rtx;
10807 gen_mcount_func_rtx (void)
10809 if (!mcount_func_rtx)
10810 mcount_func_rtx = init_one_libfunc ("_mcount");
10811 return mcount_func_rtx;
10815 ia64_profile_hook (int labelno)
10819 if (NO_PROFILE_COUNTERS)
10820 label = const0_rtx;
10824 const char *label_name;
10825 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10826 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10827 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10828 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10830 ip = gen_reg_rtx (Pmode);
10831 emit_insn (gen_ip_value (ip));
10832 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10834 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10839 /* Return the mangling of TYPE if it is an extended fundamental type. */
10841 static const char *
10842 ia64_mangle_type (const_tree type)
10844 type = TYPE_MAIN_VARIANT (type);
10846 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10847 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10850 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10852 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10854 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10855 an extended mangling. Elsewhere, "e" is available since long
10856 double is 80 bits. */
10857 if (TYPE_MODE (type) == XFmode)
10858 return TARGET_HPUX ? "u9__float80" : "e";
10859 if (TYPE_MODE (type) == RFmode)
10860 return "u7__fpreg";
10864 /* Return the diagnostic message string if conversion from FROMTYPE to
10865 TOTYPE is not allowed, NULL otherwise. */
10866 static const char *
10867 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10869 /* Reject nontrivial conversion to or from __fpreg. */
10870 if (TYPE_MODE (fromtype) == RFmode
10871 && TYPE_MODE (totype) != RFmode
10872 && TYPE_MODE (totype) != VOIDmode)
10873 return N_("invalid conversion from %<__fpreg%>");
10874 if (TYPE_MODE (totype) == RFmode
10875 && TYPE_MODE (fromtype) != RFmode)
10876 return N_("invalid conversion to %<__fpreg%>");
10880 /* Return the diagnostic message string if the unary operation OP is
10881 not permitted on TYPE, NULL otherwise. */
10882 static const char *
10883 ia64_invalid_unary_op (int op, const_tree type)
10885 /* Reject operations on __fpreg other than unary + or &. */
10886 if (TYPE_MODE (type) == RFmode
10887 && op != CONVERT_EXPR
10888 && op != ADDR_EXPR)
10889 return N_("invalid operation on %<__fpreg%>");
10893 /* Return the diagnostic message string if the binary operation OP is
10894 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10895 static const char *
10896 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10898 /* Reject operations on __fpreg. */
10899 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10900 return N_("invalid operation on %<__fpreg%>");
10904 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
10906 ia64_option_default_params (void)
10908 /* Let the scheduler form additional regions. */
10909 set_default_param_value (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS, 2);
10911 /* Set the default values for cache-related parameters. */
10912 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6);
10913 set_default_param_value (PARAM_L1_CACHE_LINE_SIZE, 32);
10915 set_default_param_value (PARAM_SCHED_MEM_TRUE_DEP_COST, 4);
10918 /* HP-UX version_id attribute.
10919 For object foo, if the version_id is set to 1234 put out an alias
10920 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10921 other than an alias statement because it is an illegal symbol name. */
10924 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10925 tree name ATTRIBUTE_UNUSED,
10927 int flags ATTRIBUTE_UNUSED,
10928 bool *no_add_attrs)
10930 tree arg = TREE_VALUE (args);
10932 if (TREE_CODE (arg) != STRING_CST)
10934 error("version attribute is not a string");
10935 *no_add_attrs = true;
10941 /* Target hook for c_mode_for_suffix. */
10943 static enum machine_mode
10944 ia64_c_mode_for_suffix (char suffix)
10954 static enum machine_mode
10955 ia64_promote_function_mode (const_tree type,
10956 enum machine_mode mode,
10958 const_tree funtype,
10961 /* Special processing required for OpenVMS ... */
10963 if (!TARGET_ABI_OPEN_VMS)
10964 return default_promote_function_mode(type, mode, punsignedp, funtype,
10967 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10968 HP OpenVMS I64 Version 8.2EFT,
10969 chapter 4 "OpenVMS I64 Conventions"
10970 section 4.7 "Procedure Linkage"
10971 subsection 4.7.5.2, "Normal Register Parameters"
10973 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10974 values passed in registers are zero-filled; signed integral values as
10975 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10976 For all other types passed in the general registers, unused bits are
10979 if (!AGGREGATE_TYPE_P (type)
10980 && GET_MODE_CLASS (mode) == MODE_INT
10981 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10983 if (mode == SImode)
10988 return promote_mode (type, mode, punsignedp);
10991 static GTY(()) rtx ia64_dconst_0_5_rtx;
10994 ia64_dconst_0_5 (void)
10996 if (! ia64_dconst_0_5_rtx)
10998 REAL_VALUE_TYPE rv;
10999 real_from_string (&rv, "0.5");
11000 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11002 return ia64_dconst_0_5_rtx;
11005 static GTY(()) rtx ia64_dconst_0_375_rtx;
11008 ia64_dconst_0_375 (void)
11010 if (! ia64_dconst_0_375_rtx)
11012 REAL_VALUE_TYPE rv;
11013 real_from_string (&rv, "0.375");
11014 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11016 return ia64_dconst_0_375_rtx;
11019 static enum machine_mode
11020 ia64_get_reg_raw_mode (int regno)
11022 if (FR_REGNO_P (regno))
11024 return default_get_reg_raw_mode(regno);
11027 /* Always default to .text section until HP-UX linker is fixed. */
11029 ATTRIBUTE_UNUSED static section *
11030 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11031 enum node_frequency freq ATTRIBUTE_UNUSED,
11032 bool startup ATTRIBUTE_UNUSED,
11033 bool exit ATTRIBUTE_UNUSED)
11038 #include "gt-ia64.h"