1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3 2009 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
59 #include "tm-constrs.h"
60 #include "sel-sched.h"
62 /* This is used for communication between ASM_OUTPUT_LABEL and
63 ASM_OUTPUT_LABELREF. */
64 int ia64_asm_output_label = 0;
66 /* Register names for ia64_expand_prologue. */
67 static const char * const ia64_reg_numbers[96] =
68 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
69 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
70 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
71 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
72 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
73 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
74 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
75 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
76 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
77 "r104","r105","r106","r107","r108","r109","r110","r111",
78 "r112","r113","r114","r115","r116","r117","r118","r119",
79 "r120","r121","r122","r123","r124","r125","r126","r127"};
81 /* ??? These strings could be shared with REGISTER_NAMES. */
82 static const char * const ia64_input_reg_names[8] =
83 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
85 /* ??? These strings could be shared with REGISTER_NAMES. */
86 static const char * const ia64_local_reg_names[80] =
87 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
88 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
89 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
90 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
91 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
92 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
93 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
94 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
95 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
96 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
98 /* ??? These strings could be shared with REGISTER_NAMES. */
99 static const char * const ia64_output_reg_names[8] =
100 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
102 /* Which cpu are we scheduling for. */
103 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
105 /* Determines whether we run our final scheduling pass or not. We always
106 avoid the normal second scheduling pass. */
107 static int ia64_flag_schedule_insns2;
109 /* Determines whether we run variable tracking in machine dependent
111 static int ia64_flag_var_tracking;
113 /* Variables which are this size or smaller are put in the sdata/sbss
116 unsigned int ia64_section_threshold;
118 /* The following variable is used by the DFA insn scheduler. The value is
119 TRUE if we do insn bundling instead of insn scheduling. */
131 number_of_ia64_frame_regs
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
144 HARD_REG_SET mask; /* mask of saved registers. */
145 unsigned int gr_used_mask; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled; /* number of spilled registers. */
148 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
149 int n_input_regs; /* number of input registers used. */
150 int n_local_regs; /* number of local registers used. */
151 int n_output_regs; /* number of output registers used. */
152 int n_rotate_regs; /* number of rotating registers used. */
154 char need_regstk; /* true if a .regstk directive needed. */
155 char initialized; /* true if the data is finalized. */
158 /* Current frame information calculated by ia64_compute_frame_size. */
159 static struct ia64_frame_info current_frame_info;
160 /* The actual registers that are emitted. */
161 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
163 static int ia64_first_cycle_multipass_dfa_lookahead (void);
164 static void ia64_dependencies_evaluation_hook (rtx, rtx);
165 static void ia64_init_dfa_pre_cycle_insn (void);
166 static rtx ia64_dfa_pre_cycle_insn (void);
167 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
168 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
169 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
170 static void ia64_h_i_d_extended (void);
171 static void * ia64_alloc_sched_context (void);
172 static void ia64_init_sched_context (void *, bool);
173 static void ia64_set_sched_context (void *);
174 static void ia64_clear_sched_context (void *);
175 static void ia64_free_sched_context (void *);
176 static int ia64_mode_to_int (enum machine_mode);
177 static void ia64_set_sched_flags (spec_info_t);
178 static ds_t ia64_get_insn_spec_ds (rtx);
179 static ds_t ia64_get_insn_checked_ds (rtx);
180 static bool ia64_skip_rtx_p (const_rtx);
181 static int ia64_speculate_insn (rtx, ds_t, rtx *);
182 static bool ia64_needs_block_p (int);
183 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
184 static int ia64_spec_check_p (rtx);
185 static int ia64_spec_check_src_p (rtx);
186 static rtx gen_tls_get_addr (void);
187 static rtx gen_thread_pointer (void);
188 static int find_gr_spill (enum ia64_frame_regs, int);
189 static int next_scratch_gr_reg (void);
190 static void mark_reg_gr_used_mask (rtx, void *);
191 static void ia64_compute_frame_size (HOST_WIDE_INT);
192 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
193 static void finish_spill_pointers (void);
194 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
195 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
196 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
197 static rtx gen_movdi_x (rtx, rtx, rtx);
198 static rtx gen_fr_spill_x (rtx, rtx, rtx);
199 static rtx gen_fr_restore_x (rtx, rtx, rtx);
201 static enum machine_mode hfa_element_mode (const_tree, bool);
202 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
204 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
206 static bool ia64_function_ok_for_sibcall (tree, tree);
207 static bool ia64_return_in_memory (const_tree, const_tree);
208 static bool ia64_rtx_costs (rtx, int, int, int *, bool);
209 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
210 static void fix_range (const char *);
211 static bool ia64_handle_option (size_t, const char *, int);
212 static struct machine_function * ia64_init_machine_status (void);
213 static void emit_insn_group_barriers (FILE *);
214 static void emit_all_insn_group_barriers (FILE *);
215 static void final_emit_insn_group_barriers (FILE *);
216 static void emit_predicate_relation_info (void);
217 static void ia64_reorg (void);
218 static bool ia64_in_small_data_p (const_tree);
219 static void process_epilogue (FILE *, rtx, bool, bool);
220 static int process_set (FILE *, rtx, rtx, bool, bool);
222 static bool ia64_assemble_integer (rtx, unsigned int, int);
223 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
224 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
225 static void ia64_output_function_end_prologue (FILE *);
227 static int ia64_issue_rate (void);
228 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
229 static void ia64_sched_init (FILE *, int, int);
230 static void ia64_sched_init_global (FILE *, int, int);
231 static void ia64_sched_finish_global (FILE *, int);
232 static void ia64_sched_finish (FILE *, int);
233 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
234 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
235 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
236 static int ia64_variable_issue (FILE *, int, rtx, int);
238 static struct bundle_state *get_free_bundle_state (void);
239 static void free_bundle_state (struct bundle_state *);
240 static void initiate_bundle_states (void);
241 static void finish_bundle_states (void);
242 static unsigned bundle_state_hash (const void *);
243 static int bundle_state_eq_p (const void *, const void *);
244 static int insert_bundle_state (struct bundle_state *);
245 static void initiate_bundle_state_table (void);
246 static void finish_bundle_state_table (void);
247 static int try_issue_nops (struct bundle_state *, int);
248 static int try_issue_insn (struct bundle_state *, rtx);
249 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
250 static int get_max_pos (state_t);
251 static int get_template (state_t, int);
253 static rtx get_next_important_insn (rtx, rtx);
254 static bool important_for_bundling_p (rtx);
255 static void bundling (FILE *, int, rtx, rtx);
257 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
258 HOST_WIDE_INT, tree);
259 static void ia64_file_start (void);
260 static void ia64_globalize_decl_name (FILE *, tree);
262 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
263 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
264 static section *ia64_select_rtx_section (enum machine_mode, rtx,
265 unsigned HOST_WIDE_INT);
266 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
268 static unsigned int ia64_section_type_flags (tree, const char *, int);
269 static void ia64_init_libfuncs (void)
271 static void ia64_hpux_init_libfuncs (void)
273 static void ia64_sysv4_init_libfuncs (void)
275 static void ia64_vms_init_libfuncs (void)
277 static void ia64_soft_fp_init_libfuncs (void)
280 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
281 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
282 static void ia64_encode_section_info (tree, rtx, int);
283 static rtx ia64_struct_value_rtx (tree, int);
284 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
285 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
286 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
287 static bool ia64_cannot_force_const_mem (rtx);
288 static const char *ia64_mangle_type (const_tree);
289 static const char *ia64_invalid_conversion (const_tree, const_tree);
290 static const char *ia64_invalid_unary_op (int, const_tree);
291 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
292 static enum machine_mode ia64_c_mode_for_suffix (char);
294 /* Table of valid machine attributes. */
295 static const struct attribute_spec ia64_attribute_table[] =
297 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
298 { "syscall_linkage", 0, 0, false, true, true, NULL },
299 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
300 { "version_id", 1, 1, true, false, false,
301 ia64_handle_version_id_attribute },
302 { NULL, 0, 0, false, false, false, NULL }
305 /* Initialize the GCC target structure. */
306 #undef TARGET_ATTRIBUTE_TABLE
307 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
309 #undef TARGET_INIT_BUILTINS
310 #define TARGET_INIT_BUILTINS ia64_init_builtins
312 #undef TARGET_EXPAND_BUILTIN
313 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
315 #undef TARGET_ASM_BYTE_OP
316 #define TARGET_ASM_BYTE_OP "\tdata1\t"
317 #undef TARGET_ASM_ALIGNED_HI_OP
318 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
319 #undef TARGET_ASM_ALIGNED_SI_OP
320 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
321 #undef TARGET_ASM_ALIGNED_DI_OP
322 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
323 #undef TARGET_ASM_UNALIGNED_HI_OP
324 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
325 #undef TARGET_ASM_UNALIGNED_SI_OP
326 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
327 #undef TARGET_ASM_UNALIGNED_DI_OP
328 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
329 #undef TARGET_ASM_INTEGER
330 #define TARGET_ASM_INTEGER ia64_assemble_integer
332 #undef TARGET_ASM_FUNCTION_PROLOGUE
333 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
334 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
335 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
336 #undef TARGET_ASM_FUNCTION_EPILOGUE
337 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
339 #undef TARGET_IN_SMALL_DATA_P
340 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
342 #undef TARGET_SCHED_ADJUST_COST_2
343 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
344 #undef TARGET_SCHED_ISSUE_RATE
345 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
346 #undef TARGET_SCHED_VARIABLE_ISSUE
347 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
348 #undef TARGET_SCHED_INIT
349 #define TARGET_SCHED_INIT ia64_sched_init
350 #undef TARGET_SCHED_FINISH
351 #define TARGET_SCHED_FINISH ia64_sched_finish
352 #undef TARGET_SCHED_INIT_GLOBAL
353 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
354 #undef TARGET_SCHED_FINISH_GLOBAL
355 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
356 #undef TARGET_SCHED_REORDER
357 #define TARGET_SCHED_REORDER ia64_sched_reorder
358 #undef TARGET_SCHED_REORDER2
359 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
361 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
362 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
364 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
365 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
367 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
368 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
369 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
370 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
372 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
373 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
374 ia64_first_cycle_multipass_dfa_lookahead_guard
376 #undef TARGET_SCHED_DFA_NEW_CYCLE
377 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
379 #undef TARGET_SCHED_H_I_D_EXTENDED
380 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
382 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
383 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
385 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
386 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
388 #undef TARGET_SCHED_SET_SCHED_CONTEXT
389 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
391 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
392 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
394 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
395 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
397 #undef TARGET_SCHED_SET_SCHED_FLAGS
398 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
400 #undef TARGET_SCHED_GET_INSN_SPEC_DS
401 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
403 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
404 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
406 #undef TARGET_SCHED_SPECULATE_INSN
407 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
409 #undef TARGET_SCHED_NEEDS_BLOCK_P
410 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
412 #undef TARGET_SCHED_GEN_SPEC_CHECK
413 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
415 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
416 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
417 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
419 #undef TARGET_SCHED_SKIP_RTX_P
420 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
422 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
423 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
424 #undef TARGET_ARG_PARTIAL_BYTES
425 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
427 #undef TARGET_ASM_OUTPUT_MI_THUNK
428 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
429 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
430 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
432 #undef TARGET_ASM_FILE_START
433 #define TARGET_ASM_FILE_START ia64_file_start
435 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
436 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
438 #undef TARGET_RTX_COSTS
439 #define TARGET_RTX_COSTS ia64_rtx_costs
440 #undef TARGET_ADDRESS_COST
441 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
443 #undef TARGET_UNSPEC_MAY_TRAP_P
444 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
446 #undef TARGET_MACHINE_DEPENDENT_REORG
447 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
449 #undef TARGET_ENCODE_SECTION_INFO
450 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
452 #undef TARGET_SECTION_TYPE_FLAGS
453 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
456 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
457 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
460 /* ??? ABI doesn't allow us to define this. */
462 #undef TARGET_PROMOTE_FUNCTION_ARGS
463 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
466 /* ??? ABI doesn't allow us to define this. */
468 #undef TARGET_PROMOTE_FUNCTION_RETURN
469 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
472 /* ??? Investigate. */
474 #undef TARGET_PROMOTE_PROTOTYPES
475 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
478 #undef TARGET_STRUCT_VALUE_RTX
479 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
480 #undef TARGET_RETURN_IN_MEMORY
481 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
482 #undef TARGET_SETUP_INCOMING_VARARGS
483 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
484 #undef TARGET_STRICT_ARGUMENT_NAMING
485 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
486 #undef TARGET_MUST_PASS_IN_STACK
487 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
489 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
490 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
492 #undef TARGET_UNWIND_EMIT
493 #define TARGET_UNWIND_EMIT process_for_unwind_directive
495 #undef TARGET_SCALAR_MODE_SUPPORTED_P
496 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
497 #undef TARGET_VECTOR_MODE_SUPPORTED_P
498 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
500 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
501 in an order different from the specified program order. */
502 #undef TARGET_RELAXED_ORDERING
503 #define TARGET_RELAXED_ORDERING true
505 #undef TARGET_DEFAULT_TARGET_FLAGS
506 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
507 #undef TARGET_HANDLE_OPTION
508 #define TARGET_HANDLE_OPTION ia64_handle_option
510 #undef TARGET_CANNOT_FORCE_CONST_MEM
511 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
513 #undef TARGET_MANGLE_TYPE
514 #define TARGET_MANGLE_TYPE ia64_mangle_type
516 #undef TARGET_INVALID_CONVERSION
517 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
518 #undef TARGET_INVALID_UNARY_OP
519 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
520 #undef TARGET_INVALID_BINARY_OP
521 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
523 #undef TARGET_C_MODE_FOR_SUFFIX
524 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
526 struct gcc_target targetm = TARGET_INITIALIZER;
530 ADDR_AREA_NORMAL, /* normal address area */
531 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
535 static GTY(()) tree small_ident1;
536 static GTY(()) tree small_ident2;
541 if (small_ident1 == 0)
543 small_ident1 = get_identifier ("small");
544 small_ident2 = get_identifier ("__small__");
548 /* Retrieve the address area that has been chosen for the given decl. */
550 static ia64_addr_area
551 ia64_get_addr_area (tree decl)
555 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
561 id = TREE_VALUE (TREE_VALUE (model_attr));
562 if (id == small_ident1 || id == small_ident2)
563 return ADDR_AREA_SMALL;
565 return ADDR_AREA_NORMAL;
569 ia64_handle_model_attribute (tree *node, tree name, tree args,
570 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
572 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
574 tree arg, decl = *node;
577 arg = TREE_VALUE (args);
578 if (arg == small_ident1 || arg == small_ident2)
580 addr_area = ADDR_AREA_SMALL;
584 warning (OPT_Wattributes, "invalid argument of %qE attribute",
586 *no_add_attrs = true;
589 switch (TREE_CODE (decl))
592 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
594 && !TREE_STATIC (decl))
596 error ("%Jan address area attribute cannot be specified for "
597 "local variables", decl);
598 *no_add_attrs = true;
600 area = ia64_get_addr_area (decl);
601 if (area != ADDR_AREA_NORMAL && addr_area != area)
603 error ("address area of %q+D conflicts with previous "
604 "declaration", decl);
605 *no_add_attrs = true;
610 error ("%Jaddress area attribute cannot be specified for functions",
612 *no_add_attrs = true;
616 warning (OPT_Wattributes, "%qE attribute ignored",
618 *no_add_attrs = true;
626 ia64_encode_addr_area (tree decl, rtx symbol)
630 flags = SYMBOL_REF_FLAGS (symbol);
631 switch (ia64_get_addr_area (decl))
633 case ADDR_AREA_NORMAL: break;
634 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
635 default: gcc_unreachable ();
637 SYMBOL_REF_FLAGS (symbol) = flags;
641 ia64_encode_section_info (tree decl, rtx rtl, int first)
643 default_encode_section_info (decl, rtl, first);
645 /* Careful not to prod global register variables. */
646 if (TREE_CODE (decl) == VAR_DECL
647 && GET_CODE (DECL_RTL (decl)) == MEM
648 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
649 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
650 ia64_encode_addr_area (decl, XEXP (rtl, 0));
653 /* Return 1 if the operands of a move are ok. */
656 ia64_move_ok (rtx dst, rtx src)
658 /* If we're under init_recog_no_volatile, we'll not be able to use
659 memory_operand. So check the code directly and don't worry about
660 the validity of the underlying address, which should have been
661 checked elsewhere anyway. */
662 if (GET_CODE (dst) != MEM)
664 if (GET_CODE (src) == MEM)
666 if (register_operand (src, VOIDmode))
669 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
670 if (INTEGRAL_MODE_P (GET_MODE (dst)))
671 return src == const0_rtx;
673 return satisfies_constraint_G (src);
676 /* Return 1 if the operands are ok for a floating point load pair. */
679 ia64_load_pair_ok (rtx dst, rtx src)
681 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
683 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
685 switch (GET_CODE (XEXP (src, 0)))
694 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
696 if (GET_CODE (adjust) != CONST_INT
697 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
708 addp4_optimize_ok (rtx op1, rtx op2)
710 return (basereg_operand (op1, GET_MODE(op1)) !=
711 basereg_operand (op2, GET_MODE(op2)));
714 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
715 Return the length of the field, or <= 0 on failure. */
718 ia64_depz_field_mask (rtx rop, rtx rshift)
720 unsigned HOST_WIDE_INT op = INTVAL (rop);
721 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
723 /* Get rid of the zero bits we're shifting in. */
726 /* We must now have a solid block of 1's at bit 0. */
727 return exact_log2 (op + 1);
730 /* Return the TLS model to use for ADDR. */
732 static enum tls_model
733 tls_symbolic_operand_type (rtx addr)
735 enum tls_model tls_kind = TLS_MODEL_NONE;
737 if (GET_CODE (addr) == CONST)
739 if (GET_CODE (XEXP (addr, 0)) == PLUS
740 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
741 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
743 else if (GET_CODE (addr) == SYMBOL_REF)
744 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
749 /* Return true if X is a constant that is valid for some immediate
750 field in an instruction. */
753 ia64_legitimate_constant_p (rtx x)
755 switch (GET_CODE (x))
762 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
763 || GET_MODE (x) == DFmode)
765 return satisfies_constraint_G (x);
769 /* ??? Short term workaround for PR 28490. We must make the code here
770 match the code in ia64_expand_move and move_operand, even though they
771 are both technically wrong. */
772 if (tls_symbolic_operand_type (x) == 0)
774 HOST_WIDE_INT addend = 0;
777 if (GET_CODE (op) == CONST
778 && GET_CODE (XEXP (op, 0)) == PLUS
779 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
781 addend = INTVAL (XEXP (XEXP (op, 0), 1));
782 op = XEXP (XEXP (op, 0), 0);
785 if (any_offset_symbol_operand (op, GET_MODE (op))
786 || function_operand (op, GET_MODE (op)))
788 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
789 return (addend & 0x3fff) == 0;
796 enum machine_mode mode = GET_MODE (x);
798 if (mode == V2SFmode)
799 return satisfies_constraint_Y (x);
801 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
802 && GET_MODE_SIZE (mode) <= 8);
810 /* Don't allow TLS addresses to get spilled to memory. */
813 ia64_cannot_force_const_mem (rtx x)
815 if (GET_MODE (x) == RFmode)
817 return tls_symbolic_operand_type (x) != 0;
820 /* Expand a symbolic constant load. */
823 ia64_expand_load_address (rtx dest, rtx src)
825 gcc_assert (GET_CODE (dest) == REG);
827 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
828 having to pointer-extend the value afterward. Other forms of address
829 computation below are also more natural to compute as 64-bit quantities.
830 If we've been given an SImode destination register, change it. */
831 if (GET_MODE (dest) != Pmode)
832 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
833 byte_lowpart_offset (Pmode, GET_MODE (dest)));
837 if (small_addr_symbolic_operand (src, VOIDmode))
841 emit_insn (gen_load_gprel64 (dest, src));
842 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
843 emit_insn (gen_load_fptr (dest, src));
844 else if (sdata_symbolic_operand (src, VOIDmode))
845 emit_insn (gen_load_gprel (dest, src));
848 HOST_WIDE_INT addend = 0;
851 /* We did split constant offsets in ia64_expand_move, and we did try
852 to keep them split in move_operand, but we also allowed reload to
853 rematerialize arbitrary constants rather than spill the value to
854 the stack and reload it. So we have to be prepared here to split
856 if (GET_CODE (src) == CONST)
858 HOST_WIDE_INT hi, lo;
860 hi = INTVAL (XEXP (XEXP (src, 0), 1));
861 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
867 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
871 tmp = gen_rtx_HIGH (Pmode, src);
872 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
873 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
875 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
876 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
880 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
881 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
888 static GTY(()) rtx gen_tls_tga;
890 gen_tls_get_addr (void)
893 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
897 static GTY(()) rtx thread_pointer_rtx;
899 gen_thread_pointer (void)
901 if (!thread_pointer_rtx)
902 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
903 return thread_pointer_rtx;
907 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
908 rtx orig_op1, HOST_WIDE_INT addend)
910 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
912 HOST_WIDE_INT addend_lo, addend_hi;
916 case TLS_MODEL_GLOBAL_DYNAMIC:
919 tga_op1 = gen_reg_rtx (Pmode);
920 emit_insn (gen_load_dtpmod (tga_op1, op1));
922 tga_op2 = gen_reg_rtx (Pmode);
923 emit_insn (gen_load_dtprel (tga_op2, op1));
925 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
926 LCT_CONST, Pmode, 2, tga_op1,
927 Pmode, tga_op2, Pmode);
929 insns = get_insns ();
932 if (GET_MODE (op0) != Pmode)
934 emit_libcall_block (insns, op0, tga_ret, op1);
937 case TLS_MODEL_LOCAL_DYNAMIC:
938 /* ??? This isn't the completely proper way to do local-dynamic
939 If the call to __tls_get_addr is used only by a single symbol,
940 then we should (somehow) move the dtprel to the second arg
941 to avoid the extra add. */
944 tga_op1 = gen_reg_rtx (Pmode);
945 emit_insn (gen_load_dtpmod (tga_op1, op1));
947 tga_op2 = const0_rtx;
949 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
950 LCT_CONST, Pmode, 2, tga_op1,
951 Pmode, tga_op2, Pmode);
953 insns = get_insns ();
956 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
958 tmp = gen_reg_rtx (Pmode);
959 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
961 if (!register_operand (op0, Pmode))
962 op0 = gen_reg_rtx (Pmode);
965 emit_insn (gen_load_dtprel (op0, op1));
966 emit_insn (gen_adddi3 (op0, tmp, op0));
969 emit_insn (gen_add_dtprel (op0, op1, tmp));
972 case TLS_MODEL_INITIAL_EXEC:
973 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
974 addend_hi = addend - addend_lo;
976 op1 = plus_constant (op1, addend_hi);
979 tmp = gen_reg_rtx (Pmode);
980 emit_insn (gen_load_tprel (tmp, op1));
982 if (!register_operand (op0, Pmode))
983 op0 = gen_reg_rtx (Pmode);
984 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
987 case TLS_MODEL_LOCAL_EXEC:
988 if (!register_operand (op0, Pmode))
989 op0 = gen_reg_rtx (Pmode);
995 emit_insn (gen_load_tprel (op0, op1));
996 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
999 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1007 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1008 orig_op0, 1, OPTAB_DIRECT);
1009 if (orig_op0 == op0)
1011 if (GET_MODE (orig_op0) == Pmode)
1013 return gen_lowpart (GET_MODE (orig_op0), op0);
1017 ia64_expand_move (rtx op0, rtx op1)
1019 enum machine_mode mode = GET_MODE (op0);
1021 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1022 op1 = force_reg (mode, op1);
1024 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1026 HOST_WIDE_INT addend = 0;
1027 enum tls_model tls_kind;
1030 if (GET_CODE (op1) == CONST
1031 && GET_CODE (XEXP (op1, 0)) == PLUS
1032 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1034 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1035 sym = XEXP (XEXP (op1, 0), 0);
1038 tls_kind = tls_symbolic_operand_type (sym);
1040 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1042 if (any_offset_symbol_operand (sym, mode))
1044 else if (aligned_offset_symbol_operand (sym, mode))
1046 HOST_WIDE_INT addend_lo, addend_hi;
1048 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1049 addend_hi = addend - addend_lo;
1053 op1 = plus_constant (sym, addend_hi);
1062 if (reload_completed)
1064 /* We really should have taken care of this offset earlier. */
1065 gcc_assert (addend == 0);
1066 if (ia64_expand_load_address (op0, op1))
1072 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1074 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1076 op1 = expand_simple_binop (mode, PLUS, subtarget,
1077 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1086 /* Split a move from OP1 to OP0 conditional on COND. */
1089 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1091 rtx insn, first = get_last_insn ();
1093 emit_move_insn (op0, op1);
1095 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1097 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1101 /* Split a post-reload TImode or TFmode reference into two DImode
1102 components. This is made extra difficult by the fact that we do
1103 not get any scratch registers to work with, because reload cannot
1104 be prevented from giving us a scratch that overlaps the register
1105 pair involved. So instead, when addressing memory, we tweak the
1106 pointer register up and back down with POST_INCs. Or up and not
1107 back down when we can get away with it.
1109 REVERSED is true when the loads must be done in reversed order
1110 (high word first) for correctness. DEAD is true when the pointer
1111 dies with the second insn we generate and therefore the second
1112 address must not carry a postmodify.
1114 May return an insn which is to be emitted after the moves. */
1117 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1121 switch (GET_CODE (in))
1124 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1125 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1130 /* Cannot occur reversed. */
1131 gcc_assert (!reversed);
1133 if (GET_MODE (in) != TFmode)
1134 split_double (in, &out[0], &out[1]);
1136 /* split_double does not understand how to split a TFmode
1137 quantity into a pair of DImode constants. */
1140 unsigned HOST_WIDE_INT p[2];
1141 long l[4]; /* TFmode is 128 bits */
1143 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1144 real_to_target (l, &r, TFmode);
1146 if (FLOAT_WORDS_BIG_ENDIAN)
1148 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1149 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1153 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1154 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1156 out[0] = GEN_INT (p[0]);
1157 out[1] = GEN_INT (p[1]);
1163 rtx base = XEXP (in, 0);
1166 switch (GET_CODE (base))
1171 out[0] = adjust_automodify_address
1172 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1173 out[1] = adjust_automodify_address
1174 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1178 /* Reversal requires a pre-increment, which can only
1179 be done as a separate insn. */
1180 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1181 out[0] = adjust_automodify_address
1182 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1183 out[1] = adjust_address (in, DImode, 0);
1188 gcc_assert (!reversed && !dead);
1190 /* Just do the increment in two steps. */
1191 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1192 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1196 gcc_assert (!reversed && !dead);
1198 /* Add 8, subtract 24. */
1199 base = XEXP (base, 0);
1200 out[0] = adjust_automodify_address
1201 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1202 out[1] = adjust_automodify_address
1204 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1209 gcc_assert (!reversed && !dead);
1211 /* Extract and adjust the modification. This case is
1212 trickier than the others, because we might have an
1213 index register, or we might have a combined offset that
1214 doesn't fit a signed 9-bit displacement field. We can
1215 assume the incoming expression is already legitimate. */
1216 offset = XEXP (base, 1);
1217 base = XEXP (base, 0);
1219 out[0] = adjust_automodify_address
1220 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1222 if (GET_CODE (XEXP (offset, 1)) == REG)
1224 /* Can't adjust the postmodify to match. Emit the
1225 original, then a separate addition insn. */
1226 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1227 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1231 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1232 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1234 /* Again the postmodify cannot be made to match,
1235 but in this case it's more efficient to get rid
1236 of the postmodify entirely and fix up with an
1238 out[1] = adjust_automodify_address (in, DImode, base, 8);
1240 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1244 /* Combined offset still fits in the displacement field.
1245 (We cannot overflow it at the high end.) */
1246 out[1] = adjust_automodify_address
1247 (in, DImode, gen_rtx_POST_MODIFY
1248 (Pmode, base, gen_rtx_PLUS
1250 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1269 /* Split a TImode or TFmode move instruction after reload.
1270 This is used by *movtf_internal and *movti_internal. */
1272 ia64_split_tmode_move (rtx operands[])
1274 rtx in[2], out[2], insn;
1277 bool reversed = false;
1279 /* It is possible for reload to decide to overwrite a pointer with
1280 the value it points to. In that case we have to do the loads in
1281 the appropriate order so that the pointer is not destroyed too
1282 early. Also we must not generate a postmodify for that second
1283 load, or rws_access_regno will die. */
1284 if (GET_CODE (operands[1]) == MEM
1285 && reg_overlap_mentioned_p (operands[0], operands[1]))
1287 rtx base = XEXP (operands[1], 0);
1288 while (GET_CODE (base) != REG)
1289 base = XEXP (base, 0);
1291 if (REGNO (base) == REGNO (operands[0]))
1295 /* Another reason to do the moves in reversed order is if the first
1296 element of the target register pair is also the second element of
1297 the source register pair. */
1298 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1299 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1302 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1303 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1305 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1306 if (GET_CODE (EXP) == MEM \
1307 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1308 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1309 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1310 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1312 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1313 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1314 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1316 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1317 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1318 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1321 emit_insn (fixup[0]);
1323 emit_insn (fixup[1]);
1325 #undef MAYBE_ADD_REG_INC_NOTE
1328 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1329 through memory plus an extra GR scratch register. Except that you can
1330 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1331 SECONDARY_RELOAD_CLASS, but not both.
1333 We got into problems in the first place by allowing a construct like
1334 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1335 This solution attempts to prevent this situation from occurring. When
1336 we see something like the above, we spill the inner register to memory. */
1339 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1341 if (GET_CODE (in) == SUBREG
1342 && GET_MODE (SUBREG_REG (in)) == TImode
1343 && GET_CODE (SUBREG_REG (in)) == REG)
1345 rtx memt = assign_stack_temp (TImode, 16, 0);
1346 emit_move_insn (memt, SUBREG_REG (in));
1347 return adjust_address (memt, mode, 0);
1349 else if (force && GET_CODE (in) == REG)
1351 rtx memx = assign_stack_temp (mode, 16, 0);
1352 emit_move_insn (memx, in);
1359 /* Expand the movxf or movrf pattern (MODE says which) with the given
1360 OPERANDS, returning true if the pattern should then invoke
1364 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1366 rtx op0 = operands[0];
1368 if (GET_CODE (op0) == SUBREG)
1369 op0 = SUBREG_REG (op0);
1371 /* We must support XFmode loads into general registers for stdarg/vararg,
1372 unprototyped calls, and a rare case where a long double is passed as
1373 an argument after a float HFA fills the FP registers. We split them into
1374 DImode loads for convenience. We also need to support XFmode stores
1375 for the last case. This case does not happen for stdarg/vararg routines,
1376 because we do a block store to memory of unnamed arguments. */
1378 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1382 /* We're hoping to transform everything that deals with XFmode
1383 quantities and GR registers early in the compiler. */
1384 gcc_assert (can_create_pseudo_p ());
1386 /* Struct to register can just use TImode instead. */
1387 if ((GET_CODE (operands[1]) == SUBREG
1388 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1389 || (GET_CODE (operands[1]) == REG
1390 && GR_REGNO_P (REGNO (operands[1]))))
1392 rtx op1 = operands[1];
1394 if (GET_CODE (op1) == SUBREG)
1395 op1 = SUBREG_REG (op1);
1397 op1 = gen_rtx_REG (TImode, REGNO (op1));
1399 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1403 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1405 /* Don't word-swap when reading in the constant. */
1406 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1407 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1409 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1410 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1415 /* If the quantity is in a register not known to be GR, spill it. */
1416 if (register_operand (operands[1], mode))
1417 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1419 gcc_assert (GET_CODE (operands[1]) == MEM);
1421 /* Don't word-swap when reading in the value. */
1422 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1423 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1425 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1426 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1430 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1432 /* We're hoping to transform everything that deals with XFmode
1433 quantities and GR registers early in the compiler. */
1434 gcc_assert (can_create_pseudo_p ());
1436 /* Op0 can't be a GR_REG here, as that case is handled above.
1437 If op0 is a register, then we spill op1, so that we now have a
1438 MEM operand. This requires creating an XFmode subreg of a TImode reg
1439 to force the spill. */
1440 if (register_operand (operands[0], mode))
1442 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1443 op1 = gen_rtx_SUBREG (mode, op1, 0);
1444 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1451 gcc_assert (GET_CODE (operands[0]) == MEM);
1453 /* Don't word-swap when writing out the value. */
1454 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1455 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1457 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1458 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1463 if (!reload_in_progress && !reload_completed)
1465 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1467 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1469 rtx memt, memx, in = operands[1];
1470 if (CONSTANT_P (in))
1471 in = validize_mem (force_const_mem (mode, in));
1472 if (GET_CODE (in) == MEM)
1473 memt = adjust_address (in, TImode, 0);
1476 memt = assign_stack_temp (TImode, 16, 0);
1477 memx = adjust_address (memt, mode, 0);
1478 emit_move_insn (memx, in);
1480 emit_move_insn (op0, memt);
1484 if (!ia64_move_ok (operands[0], operands[1]))
1485 operands[1] = force_reg (mode, operands[1]);
1491 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1492 with the expression that holds the compare result (in VOIDmode). */
1494 static GTY(()) rtx cmptf_libfunc;
1497 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1499 enum rtx_code code = GET_CODE (*expr);
1502 /* If we have a BImode input, then we already have a compare result, and
1503 do not need to emit another comparison. */
1504 if (GET_MODE (*op0) == BImode)
1506 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1509 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1510 magic number as its third argument, that indicates what to do.
1511 The return value is an integer to be compared against zero. */
1512 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1515 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1522 enum rtx_code ncode;
1525 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1528 /* 1 = equal, 0 = not equal. Equality operators do
1529 not raise FP_INVALID when given an SNaN operand. */
1530 case EQ: magic = QCMP_EQ; ncode = NE; break;
1531 case NE: magic = QCMP_EQ; ncode = EQ; break;
1532 /* isunordered() from C99. */
1533 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1534 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1535 /* Relational operators raise FP_INVALID when given
1537 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1538 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1539 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1540 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1541 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1542 Expanders for buneq etc. weuld have to be added to ia64.md
1543 for this to be useful. */
1544 default: gcc_unreachable ();
1549 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1550 *op0, TFmode, *op1, TFmode,
1551 GEN_INT (magic), DImode);
1552 cmp = gen_reg_rtx (BImode);
1553 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1554 gen_rtx_fmt_ee (ncode, BImode,
1557 insns = get_insns ();
1560 emit_libcall_block (insns, cmp, cmp,
1561 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1566 cmp = gen_reg_rtx (BImode);
1567 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1568 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1572 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1577 /* Generate an integral vector comparison. Return true if the condition has
1578 been reversed, and so the sense of the comparison should be inverted. */
1581 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1582 rtx dest, rtx op0, rtx op1)
1584 bool negate = false;
1587 /* Canonicalize the comparison to EQ, GT, GTU. */
1598 code = reverse_condition (code);
1604 code = reverse_condition (code);
1610 code = swap_condition (code);
1611 x = op0, op0 = op1, op1 = x;
1618 /* Unsigned parallel compare is not supported by the hardware. Play some
1619 tricks to turn this into a signed comparison against 0. */
1628 /* Perform a parallel modulo subtraction. */
1629 t1 = gen_reg_rtx (V2SImode);
1630 emit_insn (gen_subv2si3 (t1, op0, op1));
1632 /* Extract the original sign bit of op0. */
1633 mask = GEN_INT (-0x80000000);
1634 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1635 mask = force_reg (V2SImode, mask);
1636 t2 = gen_reg_rtx (V2SImode);
1637 emit_insn (gen_andv2si3 (t2, op0, mask));
1639 /* XOR it back into the result of the subtraction. This results
1640 in the sign bit set iff we saw unsigned underflow. */
1641 x = gen_reg_rtx (V2SImode);
1642 emit_insn (gen_xorv2si3 (x, t1, t2));
1646 op1 = CONST0_RTX (mode);
1652 /* Perform a parallel unsigned saturating subtraction. */
1653 x = gen_reg_rtx (mode);
1654 emit_insn (gen_rtx_SET (VOIDmode, x,
1655 gen_rtx_US_MINUS (mode, op0, op1)));
1659 op1 = CONST0_RTX (mode);
1668 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1669 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1674 /* Emit an integral vector conditional move. */
1677 ia64_expand_vecint_cmov (rtx operands[])
1679 enum machine_mode mode = GET_MODE (operands[0]);
1680 enum rtx_code code = GET_CODE (operands[3]);
1684 cmp = gen_reg_rtx (mode);
1685 negate = ia64_expand_vecint_compare (code, mode, cmp,
1686 operands[4], operands[5]);
1688 ot = operands[1+negate];
1689 of = operands[2-negate];
1691 if (ot == CONST0_RTX (mode))
1693 if (of == CONST0_RTX (mode))
1695 emit_move_insn (operands[0], ot);
1699 x = gen_rtx_NOT (mode, cmp);
1700 x = gen_rtx_AND (mode, x, of);
1701 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1703 else if (of == CONST0_RTX (mode))
1705 x = gen_rtx_AND (mode, cmp, ot);
1706 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1712 t = gen_reg_rtx (mode);
1713 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1714 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1716 f = gen_reg_rtx (mode);
1717 x = gen_rtx_NOT (mode, cmp);
1718 x = gen_rtx_AND (mode, x, operands[2-negate]);
1719 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1721 x = gen_rtx_IOR (mode, t, f);
1722 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1726 /* Emit an integral vector min or max operation. Return true if all done. */
1729 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1734 /* These four combinations are supported directly. */
1735 if (mode == V8QImode && (code == UMIN || code == UMAX))
1737 if (mode == V4HImode && (code == SMIN || code == SMAX))
1740 /* This combination can be implemented with only saturating subtraction. */
1741 if (mode == V4HImode && code == UMAX)
1743 rtx x, tmp = gen_reg_rtx (mode);
1745 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1746 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1748 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1752 /* Everything else implemented via vector comparisons. */
1753 xops[0] = operands[0];
1754 xops[4] = xops[1] = operands[1];
1755 xops[5] = xops[2] = operands[2];
1774 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1776 ia64_expand_vecint_cmov (xops);
1780 /* Emit an integral vector widening sum operations. */
1783 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1786 enum machine_mode wmode, mode;
1787 rtx (*unpack_l) (rtx, rtx, rtx);
1788 rtx (*unpack_h) (rtx, rtx, rtx);
1789 rtx (*plus) (rtx, rtx, rtx);
1791 wmode = GET_MODE (operands[0]);
1792 mode = GET_MODE (operands[1]);
1797 unpack_l = gen_unpack1_l;
1798 unpack_h = gen_unpack1_h;
1799 plus = gen_addv4hi3;
1802 unpack_l = gen_unpack2_l;
1803 unpack_h = gen_unpack2_h;
1804 plus = gen_addv2si3;
1810 /* Fill in x with the sign extension of each element in op1. */
1812 x = CONST0_RTX (mode);
1817 x = gen_reg_rtx (mode);
1819 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1824 l = gen_reg_rtx (wmode);
1825 h = gen_reg_rtx (wmode);
1826 s = gen_reg_rtx (wmode);
1828 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1829 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1830 emit_insn (plus (s, l, operands[2]));
1831 emit_insn (plus (operands[0], h, s));
1834 /* Emit a signed or unsigned V8QI dot product operation. */
1837 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1839 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1841 /* Fill in x1 and x2 with the sign extension of each element. */
1843 x1 = x2 = CONST0_RTX (V8QImode);
1848 x1 = gen_reg_rtx (V8QImode);
1849 x2 = gen_reg_rtx (V8QImode);
1851 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1852 CONST0_RTX (V8QImode));
1854 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1855 CONST0_RTX (V8QImode));
1859 l1 = gen_reg_rtx (V4HImode);
1860 l2 = gen_reg_rtx (V4HImode);
1861 h1 = gen_reg_rtx (V4HImode);
1862 h2 = gen_reg_rtx (V4HImode);
1864 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1865 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1866 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1867 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1869 p1 = gen_reg_rtx (V2SImode);
1870 p2 = gen_reg_rtx (V2SImode);
1871 p3 = gen_reg_rtx (V2SImode);
1872 p4 = gen_reg_rtx (V2SImode);
1873 emit_insn (gen_pmpy2_r (p1, l1, l2));
1874 emit_insn (gen_pmpy2_l (p2, l1, l2));
1875 emit_insn (gen_pmpy2_r (p3, h1, h2));
1876 emit_insn (gen_pmpy2_l (p4, h1, h2));
1878 s1 = gen_reg_rtx (V2SImode);
1879 s2 = gen_reg_rtx (V2SImode);
1880 s3 = gen_reg_rtx (V2SImode);
1881 emit_insn (gen_addv2si3 (s1, p1, p2));
1882 emit_insn (gen_addv2si3 (s2, p3, p4));
1883 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1884 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1887 /* Emit the appropriate sequence for a call. */
1890 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1895 addr = XEXP (addr, 0);
1896 addr = convert_memory_address (DImode, addr);
1897 b0 = gen_rtx_REG (DImode, R_BR (0));
1899 /* ??? Should do this for functions known to bind local too. */
1900 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1903 insn = gen_sibcall_nogp (addr);
1905 insn = gen_call_nogp (addr, b0);
1907 insn = gen_call_value_nogp (retval, addr, b0);
1908 insn = emit_call_insn (insn);
1913 insn = gen_sibcall_gp (addr);
1915 insn = gen_call_gp (addr, b0);
1917 insn = gen_call_value_gp (retval, addr, b0);
1918 insn = emit_call_insn (insn);
1920 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1924 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1928 reg_emitted (enum ia64_frame_regs r)
1930 if (emitted_frame_related_regs[r] == 0)
1931 emitted_frame_related_regs[r] = current_frame_info.r[r];
1933 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
1937 get_reg (enum ia64_frame_regs r)
1940 return current_frame_info.r[r];
1944 is_emitted (int regno)
1946 enum ia64_frame_regs r;
1948 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
1949 if (emitted_frame_related_regs[r] == regno)
1955 ia64_reload_gp (void)
1959 if (current_frame_info.r[reg_save_gp])
1961 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
1965 HOST_WIDE_INT offset;
1968 offset = (current_frame_info.spill_cfa_off
1969 + current_frame_info.spill_size);
1970 if (frame_pointer_needed)
1972 tmp = hard_frame_pointer_rtx;
1977 tmp = stack_pointer_rtx;
1978 offset = current_frame_info.total_size - offset;
1981 offset_r = GEN_INT (offset);
1982 if (satisfies_constraint_I (offset_r))
1983 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
1986 emit_move_insn (pic_offset_table_rtx, offset_r);
1987 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1988 pic_offset_table_rtx, tmp));
1991 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1994 emit_move_insn (pic_offset_table_rtx, tmp);
1998 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1999 rtx scratch_b, int noreturn_p, int sibcall_p)
2002 bool is_desc = false;
2004 /* If we find we're calling through a register, then we're actually
2005 calling through a descriptor, so load up the values. */
2006 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2011 /* ??? We are currently constrained to *not* use peep2, because
2012 we can legitimately change the global lifetime of the GP
2013 (in the form of killing where previously live). This is
2014 because a call through a descriptor doesn't use the previous
2015 value of the GP, while a direct call does, and we do not
2016 commit to either form until the split here.
2018 That said, this means that we lack precise life info for
2019 whether ADDR is dead after this call. This is not terribly
2020 important, since we can fix things up essentially for free
2021 with the POST_DEC below, but it's nice to not use it when we
2022 can immediately tell it's not necessary. */
2023 addr_dead_p = ((noreturn_p || sibcall_p
2024 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2026 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2028 /* Load the code address into scratch_b. */
2029 tmp = gen_rtx_POST_INC (Pmode, addr);
2030 tmp = gen_rtx_MEM (Pmode, tmp);
2031 emit_move_insn (scratch_r, tmp);
2032 emit_move_insn (scratch_b, scratch_r);
2034 /* Load the GP address. If ADDR is not dead here, then we must
2035 revert the change made above via the POST_INCREMENT. */
2037 tmp = gen_rtx_POST_DEC (Pmode, addr);
2040 tmp = gen_rtx_MEM (Pmode, tmp);
2041 emit_move_insn (pic_offset_table_rtx, tmp);
2048 insn = gen_sibcall_nogp (addr);
2050 insn = gen_call_value_nogp (retval, addr, retaddr);
2052 insn = gen_call_nogp (addr, retaddr);
2053 emit_call_insn (insn);
2055 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2059 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2061 This differs from the generic code in that we know about the zero-extending
2062 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2063 also know that ld.acq+cmpxchg.rel equals a full barrier.
2065 The loop we want to generate looks like
2070 new_reg = cmp_reg op val;
2071 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2072 if (cmp_reg != old_reg)
2075 Note that we only do the plain load from memory once. Subsequent
2076 iterations use the value loaded by the compare-and-swap pattern. */
2079 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2080 rtx old_dst, rtx new_dst)
2082 enum machine_mode mode = GET_MODE (mem);
2083 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2084 enum insn_code icode;
2086 /* Special case for using fetchadd. */
2087 if ((mode == SImode || mode == DImode)
2088 && (code == PLUS || code == MINUS)
2089 && fetchadd_operand (val, mode))
2092 val = GEN_INT (-INTVAL (val));
2095 old_dst = gen_reg_rtx (mode);
2097 emit_insn (gen_memory_barrier ());
2100 icode = CODE_FOR_fetchadd_acq_si;
2102 icode = CODE_FOR_fetchadd_acq_di;
2103 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2107 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2109 if (new_reg != new_dst)
2110 emit_move_insn (new_dst, new_reg);
2115 /* Because of the volatile mem read, we get an ld.acq, which is the
2116 front half of the full barrier. The end half is the cmpxchg.rel. */
2117 gcc_assert (MEM_VOLATILE_P (mem));
2119 old_reg = gen_reg_rtx (DImode);
2120 cmp_reg = gen_reg_rtx (DImode);
2121 label = gen_label_rtx ();
2125 val = simplify_gen_subreg (DImode, val, mode, 0);
2126 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2129 emit_move_insn (cmp_reg, mem);
2133 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2134 emit_move_insn (old_reg, cmp_reg);
2135 emit_move_insn (ar_ccv, cmp_reg);
2138 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2143 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2144 true, OPTAB_DIRECT);
2145 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2148 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2149 true, OPTAB_DIRECT);
2152 new_reg = gen_lowpart (mode, new_reg);
2154 emit_move_insn (new_dst, new_reg);
2158 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2159 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2160 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2161 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2166 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2168 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2171 /* Begin the assembly file. */
2174 ia64_file_start (void)
2176 /* Variable tracking should be run after all optimizations which change order
2177 of insns. It also needs a valid CFG. This can't be done in
2178 ia64_override_options, because flag_var_tracking is finalized after
2180 ia64_flag_var_tracking = flag_var_tracking;
2181 flag_var_tracking = 0;
2183 default_file_start ();
2184 emit_safe_across_calls ();
2188 emit_safe_across_calls (void)
2190 unsigned int rs, re;
2197 while (rs < 64 && call_used_regs[PR_REG (rs)])
2201 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2205 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2209 fputc (',', asm_out_file);
2211 fprintf (asm_out_file, "p%u", rs);
2213 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2217 fputc ('\n', asm_out_file);
2220 /* Globalize a declaration. */
2223 ia64_globalize_decl_name (FILE * stream, tree decl)
2225 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2226 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2229 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2230 const char *p = TREE_STRING_POINTER (v);
2231 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2233 targetm.asm_out.globalize_label (stream, name);
2234 if (TREE_CODE (decl) == FUNCTION_DECL)
2235 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2238 /* Helper function for ia64_compute_frame_size: find an appropriate general
2239 register to spill some special register to. SPECIAL_SPILL_MASK contains
2240 bits in GR0 to GR31 that have already been allocated by this routine.
2241 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2244 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2248 if (emitted_frame_related_regs[r] != 0)
2250 regno = emitted_frame_related_regs[r];
2251 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2252 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2253 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2254 else if (current_function_is_leaf
2255 && regno >= GR_REG (1) && regno <= GR_REG (31))
2256 current_frame_info.gr_used_mask |= 1 << regno;
2261 /* If this is a leaf function, first try an otherwise unused
2262 call-clobbered register. */
2263 if (current_function_is_leaf)
2265 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2266 if (! df_regs_ever_live_p (regno)
2267 && call_used_regs[regno]
2268 && ! fixed_regs[regno]
2269 && ! global_regs[regno]
2270 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2271 && ! is_emitted (regno))
2273 current_frame_info.gr_used_mask |= 1 << regno;
2280 regno = current_frame_info.n_local_regs;
2281 /* If there is a frame pointer, then we can't use loc79, because
2282 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2283 reg_name switching code in ia64_expand_prologue. */
2284 while (regno < (80 - frame_pointer_needed))
2285 if (! is_emitted (LOC_REG (regno++)))
2287 current_frame_info.n_local_regs = regno;
2288 return LOC_REG (regno - 1);
2292 /* Failed to find a general register to spill to. Must use stack. */
2296 /* In order to make for nice schedules, we try to allocate every temporary
2297 to a different register. We must of course stay away from call-saved,
2298 fixed, and global registers. We must also stay away from registers
2299 allocated in current_frame_info.gr_used_mask, since those include regs
2300 used all through the prologue.
2302 Any register allocated here must be used immediately. The idea is to
2303 aid scheduling, not to solve data flow problems. */
2305 static int last_scratch_gr_reg;
2308 next_scratch_gr_reg (void)
2312 for (i = 0; i < 32; ++i)
2314 regno = (last_scratch_gr_reg + i + 1) & 31;
2315 if (call_used_regs[regno]
2316 && ! fixed_regs[regno]
2317 && ! global_regs[regno]
2318 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2320 last_scratch_gr_reg = regno;
2325 /* There must be _something_ available. */
2329 /* Helper function for ia64_compute_frame_size, called through
2330 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2333 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2335 unsigned int regno = REGNO (reg);
2338 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2339 for (i = 0; i < n; ++i)
2340 current_frame_info.gr_used_mask |= 1 << (regno + i);
2345 /* Returns the number of bytes offset between the frame pointer and the stack
2346 pointer for the current function. SIZE is the number of bytes of space
2347 needed for local variables. */
2350 ia64_compute_frame_size (HOST_WIDE_INT size)
2352 HOST_WIDE_INT total_size;
2353 HOST_WIDE_INT spill_size = 0;
2354 HOST_WIDE_INT extra_spill_size = 0;
2355 HOST_WIDE_INT pretend_args_size;
2358 int spilled_gr_p = 0;
2359 int spilled_fr_p = 0;
2365 if (current_frame_info.initialized)
2368 memset (¤t_frame_info, 0, sizeof current_frame_info);
2369 CLEAR_HARD_REG_SET (mask);
2371 /* Don't allocate scratches to the return register. */
2372 diddle_return_value (mark_reg_gr_used_mask, NULL);
2374 /* Don't allocate scratches to the EH scratch registers. */
2375 if (cfun->machine->ia64_eh_epilogue_sp)
2376 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2377 if (cfun->machine->ia64_eh_epilogue_bsp)
2378 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2380 /* Find the size of the register stack frame. We have only 80 local
2381 registers, because we reserve 8 for the inputs and 8 for the
2384 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2385 since we'll be adjusting that down later. */
2386 regno = LOC_REG (78) + ! frame_pointer_needed;
2387 for (; regno >= LOC_REG (0); regno--)
2388 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2390 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2392 /* For functions marked with the syscall_linkage attribute, we must mark
2393 all eight input registers as in use, so that locals aren't visible to
2396 if (cfun->machine->n_varargs > 0
2397 || lookup_attribute ("syscall_linkage",
2398 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2399 current_frame_info.n_input_regs = 8;
2402 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2403 if (df_regs_ever_live_p (regno))
2405 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2408 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2409 if (df_regs_ever_live_p (regno))
2411 i = regno - OUT_REG (0) + 1;
2413 #ifndef PROFILE_HOOK
2414 /* When -p profiling, we need one output register for the mcount argument.
2415 Likewise for -a profiling for the bb_init_func argument. For -ax
2416 profiling, we need two output registers for the two bb_init_trace_func
2421 current_frame_info.n_output_regs = i;
2423 /* ??? No rotating register support yet. */
2424 current_frame_info.n_rotate_regs = 0;
2426 /* Discover which registers need spilling, and how much room that
2427 will take. Begin with floating point and general registers,
2428 which will always wind up on the stack. */
2430 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2431 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2433 SET_HARD_REG_BIT (mask, regno);
2439 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2440 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2442 SET_HARD_REG_BIT (mask, regno);
2448 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2449 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2451 SET_HARD_REG_BIT (mask, regno);
2456 /* Now come all special registers that might get saved in other
2457 general registers. */
2459 if (frame_pointer_needed)
2461 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2462 /* If we did not get a register, then we take LOC79. This is guaranteed
2463 to be free, even if regs_ever_live is already set, because this is
2464 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2465 as we don't count loc79 above. */
2466 if (current_frame_info.r[reg_fp] == 0)
2468 current_frame_info.r[reg_fp] = LOC_REG (79);
2469 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2473 if (! current_function_is_leaf)
2475 /* Emit a save of BR0 if we call other functions. Do this even
2476 if this function doesn't return, as EH depends on this to be
2477 able to unwind the stack. */
2478 SET_HARD_REG_BIT (mask, BR_REG (0));
2480 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2481 if (current_frame_info.r[reg_save_b0] == 0)
2483 extra_spill_size += 8;
2487 /* Similarly for ar.pfs. */
2488 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2489 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2490 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2492 extra_spill_size += 8;
2496 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2497 registers are clobbered, so we fall back to the stack. */
2498 current_frame_info.r[reg_save_gp]
2499 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2500 if (current_frame_info.r[reg_save_gp] == 0)
2502 SET_HARD_REG_BIT (mask, GR_REG (1));
2509 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2511 SET_HARD_REG_BIT (mask, BR_REG (0));
2512 extra_spill_size += 8;
2516 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2518 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2519 current_frame_info.r[reg_save_ar_pfs]
2520 = find_gr_spill (reg_save_ar_pfs, 1);
2521 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2523 extra_spill_size += 8;
2529 /* Unwind descriptor hackery: things are most efficient if we allocate
2530 consecutive GR save registers for RP, PFS, FP in that order. However,
2531 it is absolutely critical that FP get the only hard register that's
2532 guaranteed to be free, so we allocated it first. If all three did
2533 happen to be allocated hard regs, and are consecutive, rearrange them
2534 into the preferred order now.
2536 If we have already emitted code for any of those registers,
2537 then it's already too late to change. */
2538 min_regno = MIN (current_frame_info.r[reg_fp],
2539 MIN (current_frame_info.r[reg_save_b0],
2540 current_frame_info.r[reg_save_ar_pfs]));
2541 max_regno = MAX (current_frame_info.r[reg_fp],
2542 MAX (current_frame_info.r[reg_save_b0],
2543 current_frame_info.r[reg_save_ar_pfs]));
2545 && min_regno + 2 == max_regno
2546 && (current_frame_info.r[reg_fp] == min_regno + 1
2547 || current_frame_info.r[reg_save_b0] == min_regno + 1
2548 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2549 && (emitted_frame_related_regs[reg_save_b0] == 0
2550 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2551 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2552 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2553 && (emitted_frame_related_regs[reg_fp] == 0
2554 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2556 current_frame_info.r[reg_save_b0] = min_regno;
2557 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2558 current_frame_info.r[reg_fp] = min_regno + 2;
2561 /* See if we need to store the predicate register block. */
2562 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2563 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2565 if (regno <= PR_REG (63))
2567 SET_HARD_REG_BIT (mask, PR_REG (0));
2568 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2569 if (current_frame_info.r[reg_save_pr] == 0)
2571 extra_spill_size += 8;
2575 /* ??? Mark them all as used so that register renaming and such
2576 are free to use them. */
2577 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2578 df_set_regs_ever_live (regno, true);
2581 /* If we're forced to use st8.spill, we're forced to save and restore
2582 ar.unat as well. The check for existing liveness allows inline asm
2583 to touch ar.unat. */
2584 if (spilled_gr_p || cfun->machine->n_varargs
2585 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2587 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2588 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2589 current_frame_info.r[reg_save_ar_unat]
2590 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2591 if (current_frame_info.r[reg_save_ar_unat] == 0)
2593 extra_spill_size += 8;
2598 if (df_regs_ever_live_p (AR_LC_REGNUM))
2600 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2601 current_frame_info.r[reg_save_ar_lc]
2602 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2603 if (current_frame_info.r[reg_save_ar_lc] == 0)
2605 extra_spill_size += 8;
2610 /* If we have an odd number of words of pretend arguments written to
2611 the stack, then the FR save area will be unaligned. We round the
2612 size of this area up to keep things 16 byte aligned. */
2614 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2616 pretend_args_size = crtl->args.pretend_args_size;
2618 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2619 + crtl->outgoing_args_size);
2620 total_size = IA64_STACK_ALIGN (total_size);
2622 /* We always use the 16-byte scratch area provided by the caller, but
2623 if we are a leaf function, there's no one to which we need to provide
2625 if (current_function_is_leaf)
2626 total_size = MAX (0, total_size - 16);
2628 current_frame_info.total_size = total_size;
2629 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2630 current_frame_info.spill_size = spill_size;
2631 current_frame_info.extra_spill_size = extra_spill_size;
2632 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2633 current_frame_info.n_spilled = n_spilled;
2634 current_frame_info.initialized = reload_completed;
2637 /* Compute the initial difference between the specified pair of registers. */
2640 ia64_initial_elimination_offset (int from, int to)
2642 HOST_WIDE_INT offset;
2644 ia64_compute_frame_size (get_frame_size ());
2647 case FRAME_POINTER_REGNUM:
2650 case HARD_FRAME_POINTER_REGNUM:
2651 if (current_function_is_leaf)
2652 offset = -current_frame_info.total_size;
2654 offset = -(current_frame_info.total_size
2655 - crtl->outgoing_args_size - 16);
2658 case STACK_POINTER_REGNUM:
2659 if (current_function_is_leaf)
2662 offset = 16 + crtl->outgoing_args_size;
2670 case ARG_POINTER_REGNUM:
2671 /* Arguments start above the 16 byte save area, unless stdarg
2672 in which case we store through the 16 byte save area. */
2675 case HARD_FRAME_POINTER_REGNUM:
2676 offset = 16 - crtl->args.pretend_args_size;
2679 case STACK_POINTER_REGNUM:
2680 offset = (current_frame_info.total_size
2681 + 16 - crtl->args.pretend_args_size);
2696 /* If there are more than a trivial number of register spills, we use
2697 two interleaved iterators so that we can get two memory references
2700 In order to simplify things in the prologue and epilogue expanders,
2701 we use helper functions to fix up the memory references after the
2702 fact with the appropriate offsets to a POST_MODIFY memory mode.
2703 The following data structure tracks the state of the two iterators
2704 while insns are being emitted. */
2706 struct spill_fill_data
2708 rtx init_after; /* point at which to emit initializations */
2709 rtx init_reg[2]; /* initial base register */
2710 rtx iter_reg[2]; /* the iterator registers */
2711 rtx *prev_addr[2]; /* address of last memory use */
2712 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2713 HOST_WIDE_INT prev_off[2]; /* last offset */
2714 int n_iter; /* number of iterators in use */
2715 int next_iter; /* next iterator to use */
2716 unsigned int save_gr_used_mask;
2719 static struct spill_fill_data spill_fill_data;
2722 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2726 spill_fill_data.init_after = get_last_insn ();
2727 spill_fill_data.init_reg[0] = init_reg;
2728 spill_fill_data.init_reg[1] = init_reg;
2729 spill_fill_data.prev_addr[0] = NULL;
2730 spill_fill_data.prev_addr[1] = NULL;
2731 spill_fill_data.prev_insn[0] = NULL;
2732 spill_fill_data.prev_insn[1] = NULL;
2733 spill_fill_data.prev_off[0] = cfa_off;
2734 spill_fill_data.prev_off[1] = cfa_off;
2735 spill_fill_data.next_iter = 0;
2736 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2738 spill_fill_data.n_iter = 1 + (n_spills > 2);
2739 for (i = 0; i < spill_fill_data.n_iter; ++i)
2741 int regno = next_scratch_gr_reg ();
2742 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2743 current_frame_info.gr_used_mask |= 1 << regno;
2748 finish_spill_pointers (void)
2750 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2754 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2756 int iter = spill_fill_data.next_iter;
2757 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2758 rtx disp_rtx = GEN_INT (disp);
2761 if (spill_fill_data.prev_addr[iter])
2763 if (satisfies_constraint_N (disp_rtx))
2765 *spill_fill_data.prev_addr[iter]
2766 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2767 gen_rtx_PLUS (DImode,
2768 spill_fill_data.iter_reg[iter],
2770 add_reg_note (spill_fill_data.prev_insn[iter],
2771 REG_INC, spill_fill_data.iter_reg[iter]);
2775 /* ??? Could use register post_modify for loads. */
2776 if (!satisfies_constraint_I (disp_rtx))
2778 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2779 emit_move_insn (tmp, disp_rtx);
2782 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2783 spill_fill_data.iter_reg[iter], disp_rtx));
2786 /* Micro-optimization: if we've created a frame pointer, it's at
2787 CFA 0, which may allow the real iterator to be initialized lower,
2788 slightly increasing parallelism. Also, if there are few saves
2789 it may eliminate the iterator entirely. */
2791 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2792 && frame_pointer_needed)
2794 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2795 set_mem_alias_set (mem, get_varargs_alias_set ());
2803 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2804 spill_fill_data.init_reg[iter]);
2809 if (!satisfies_constraint_I (disp_rtx))
2811 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2812 emit_move_insn (tmp, disp_rtx);
2816 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2817 spill_fill_data.init_reg[iter],
2824 /* Careful for being the first insn in a sequence. */
2825 if (spill_fill_data.init_after)
2826 insn = emit_insn_after (seq, spill_fill_data.init_after);
2829 rtx first = get_insns ();
2831 insn = emit_insn_before (seq, first);
2833 insn = emit_insn (seq);
2835 spill_fill_data.init_after = insn;
2838 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2840 /* ??? Not all of the spills are for varargs, but some of them are.
2841 The rest of the spills belong in an alias set of their own. But
2842 it doesn't actually hurt to include them here. */
2843 set_mem_alias_set (mem, get_varargs_alias_set ());
2845 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2846 spill_fill_data.prev_off[iter] = cfa_off;
2848 if (++iter >= spill_fill_data.n_iter)
2850 spill_fill_data.next_iter = iter;
2856 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2859 int iter = spill_fill_data.next_iter;
2862 mem = spill_restore_mem (reg, cfa_off);
2863 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2864 spill_fill_data.prev_insn[iter] = insn;
2871 RTX_FRAME_RELATED_P (insn) = 1;
2873 /* Don't even pretend that the unwind code can intuit its way
2874 through a pair of interleaved post_modify iterators. Just
2875 provide the correct answer. */
2877 if (frame_pointer_needed)
2879 base = hard_frame_pointer_rtx;
2884 base = stack_pointer_rtx;
2885 off = current_frame_info.total_size - cfa_off;
2888 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2889 gen_rtx_SET (VOIDmode,
2890 gen_rtx_MEM (GET_MODE (reg),
2891 plus_constant (base, off)),
2897 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2899 int iter = spill_fill_data.next_iter;
2902 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2903 GEN_INT (cfa_off)));
2904 spill_fill_data.prev_insn[iter] = insn;
2907 /* Wrapper functions that discards the CONST_INT spill offset. These
2908 exist so that we can give gr_spill/gr_fill the offset they need and
2909 use a consistent function interface. */
2912 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2914 return gen_movdi (dest, src);
2918 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2920 return gen_fr_spill (dest, src);
2924 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2926 return gen_fr_restore (dest, src);
2929 /* Called after register allocation to add any instructions needed for the
2930 prologue. Using a prologue insn is favored compared to putting all of the
2931 instructions in output_function_prologue(), since it allows the scheduler
2932 to intermix instructions with the saves of the caller saved registers. In
2933 some cases, it might be necessary to emit a barrier instruction as the last
2934 insn to prevent such scheduling.
2936 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2937 so that the debug info generation code can handle them properly.
2939 The register save area is layed out like so:
2941 [ varargs spill area ]
2942 [ fr register spill area ]
2943 [ br register spill area ]
2944 [ ar register spill area ]
2945 [ pr register spill area ]
2946 [ gr register spill area ] */
2948 /* ??? Get inefficient code when the frame size is larger than can fit in an
2949 adds instruction. */
2952 ia64_expand_prologue (void)
2954 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2955 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2958 ia64_compute_frame_size (get_frame_size ());
2959 last_scratch_gr_reg = 15;
2963 fprintf (dump_file, "ia64 frame related registers "
2964 "recorded in current_frame_info.r[]:\n");
2965 #define PRINTREG(a) if (current_frame_info.r[a]) \
2966 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
2968 PRINTREG(reg_save_b0);
2969 PRINTREG(reg_save_pr);
2970 PRINTREG(reg_save_ar_pfs);
2971 PRINTREG(reg_save_ar_unat);
2972 PRINTREG(reg_save_ar_lc);
2973 PRINTREG(reg_save_gp);
2977 /* If there is no epilogue, then we don't need some prologue insns.
2978 We need to avoid emitting the dead prologue insns, because flow
2979 will complain about them. */
2985 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2986 if ((e->flags & EDGE_FAKE) == 0
2987 && (e->flags & EDGE_FALLTHRU) != 0)
2989 epilogue_p = (e != NULL);
2994 /* Set the local, input, and output register names. We need to do this
2995 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2996 half. If we use in/loc/out register names, then we get assembler errors
2997 in crtn.S because there is no alloc insn or regstk directive in there. */
2998 if (! TARGET_REG_NAMES)
3000 int inputs = current_frame_info.n_input_regs;
3001 int locals = current_frame_info.n_local_regs;
3002 int outputs = current_frame_info.n_output_regs;
3004 for (i = 0; i < inputs; i++)
3005 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3006 for (i = 0; i < locals; i++)
3007 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3008 for (i = 0; i < outputs; i++)
3009 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3012 /* Set the frame pointer register name. The regnum is logically loc79,
3013 but of course we'll not have allocated that many locals. Rather than
3014 worrying about renumbering the existing rtxs, we adjust the name. */
3015 /* ??? This code means that we can never use one local register when
3016 there is a frame pointer. loc79 gets wasted in this case, as it is
3017 renamed to a register that will never be used. See also the try_locals
3018 code in find_gr_spill. */
3019 if (current_frame_info.r[reg_fp])
3021 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3022 reg_names[HARD_FRAME_POINTER_REGNUM]
3023 = reg_names[current_frame_info.r[reg_fp]];
3024 reg_names[current_frame_info.r[reg_fp]] = tmp;
3027 /* We don't need an alloc instruction if we've used no outputs or locals. */
3028 if (current_frame_info.n_local_regs == 0
3029 && current_frame_info.n_output_regs == 0
3030 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3031 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3033 /* If there is no alloc, but there are input registers used, then we
3034 need a .regstk directive. */
3035 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3036 ar_pfs_save_reg = NULL_RTX;
3040 current_frame_info.need_regstk = 0;
3042 if (current_frame_info.r[reg_save_ar_pfs])
3044 regno = current_frame_info.r[reg_save_ar_pfs];
3045 reg_emitted (reg_save_ar_pfs);
3048 regno = next_scratch_gr_reg ();
3049 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3051 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3052 GEN_INT (current_frame_info.n_input_regs),
3053 GEN_INT (current_frame_info.n_local_regs),
3054 GEN_INT (current_frame_info.n_output_regs),
3055 GEN_INT (current_frame_info.n_rotate_regs)));
3056 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3059 /* Set up frame pointer, stack pointer, and spill iterators. */
3061 n_varargs = cfun->machine->n_varargs;
3062 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3063 stack_pointer_rtx, 0);
3065 if (frame_pointer_needed)
3067 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3068 RTX_FRAME_RELATED_P (insn) = 1;
3071 if (current_frame_info.total_size != 0)
3073 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3076 if (satisfies_constraint_I (frame_size_rtx))
3077 offset = frame_size_rtx;
3080 regno = next_scratch_gr_reg ();
3081 offset = gen_rtx_REG (DImode, regno);
3082 emit_move_insn (offset, frame_size_rtx);
3085 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3086 stack_pointer_rtx, offset));
3088 if (! frame_pointer_needed)
3090 RTX_FRAME_RELATED_P (insn) = 1;
3091 if (GET_CODE (offset) != CONST_INT)
3092 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3093 gen_rtx_SET (VOIDmode,
3095 gen_rtx_PLUS (DImode,
3100 /* ??? At this point we must generate a magic insn that appears to
3101 modify the stack pointer, the frame pointer, and all spill
3102 iterators. This would allow the most scheduling freedom. For
3103 now, just hard stop. */
3104 emit_insn (gen_blockage ());
3107 /* Must copy out ar.unat before doing any integer spills. */
3108 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3110 if (current_frame_info.r[reg_save_ar_unat])
3113 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3114 reg_emitted (reg_save_ar_unat);
3118 alt_regno = next_scratch_gr_reg ();
3119 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3120 current_frame_info.gr_used_mask |= 1 << alt_regno;
3123 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3124 insn = emit_move_insn (ar_unat_save_reg, reg);
3125 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
3127 /* Even if we're not going to generate an epilogue, we still
3128 need to save the register so that EH works. */
3129 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3130 emit_insn (gen_prologue_use (ar_unat_save_reg));
3133 ar_unat_save_reg = NULL_RTX;
3135 /* Spill all varargs registers. Do this before spilling any GR registers,
3136 since we want the UNAT bits for the GR registers to override the UNAT
3137 bits from varargs, which we don't care about. */
3140 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3142 reg = gen_rtx_REG (DImode, regno);
3143 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3146 /* Locate the bottom of the register save area. */
3147 cfa_off = (current_frame_info.spill_cfa_off
3148 + current_frame_info.spill_size
3149 + current_frame_info.extra_spill_size);
3151 /* Save the predicate register block either in a register or in memory. */
3152 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3154 reg = gen_rtx_REG (DImode, PR_REG (0));
3155 if (current_frame_info.r[reg_save_pr] != 0)
3157 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3158 reg_emitted (reg_save_pr);
3159 insn = emit_move_insn (alt_reg, reg);
3161 /* ??? Denote pr spill/fill by a DImode move that modifies all
3162 64 hard registers. */
3163 RTX_FRAME_RELATED_P (insn) = 1;
3164 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3165 gen_rtx_SET (VOIDmode, alt_reg, reg));
3167 /* Even if we're not going to generate an epilogue, we still
3168 need to save the register so that EH works. */
3170 emit_insn (gen_prologue_use (alt_reg));
3174 alt_regno = next_scratch_gr_reg ();
3175 alt_reg = gen_rtx_REG (DImode, alt_regno);
3176 insn = emit_move_insn (alt_reg, reg);
3177 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3182 /* Handle AR regs in numerical order. All of them get special handling. */
3183 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3184 && current_frame_info.r[reg_save_ar_unat] == 0)
3186 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3187 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3191 /* The alloc insn already copied ar.pfs into a general register. The
3192 only thing we have to do now is copy that register to a stack slot
3193 if we'd not allocated a local register for the job. */
3194 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3195 && current_frame_info.r[reg_save_ar_pfs] == 0)
3197 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3198 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3202 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3204 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3205 if (current_frame_info.r[reg_save_ar_lc] != 0)
3207 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3208 reg_emitted (reg_save_ar_lc);
3209 insn = emit_move_insn (alt_reg, reg);
3210 RTX_FRAME_RELATED_P (insn) = 1;
3212 /* Even if we're not going to generate an epilogue, we still
3213 need to save the register so that EH works. */
3215 emit_insn (gen_prologue_use (alt_reg));
3219 alt_regno = next_scratch_gr_reg ();
3220 alt_reg = gen_rtx_REG (DImode, alt_regno);
3221 emit_move_insn (alt_reg, reg);
3222 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3227 /* Save the return pointer. */
3228 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3230 reg = gen_rtx_REG (DImode, BR_REG (0));
3231 if (current_frame_info.r[reg_save_b0] != 0)
3233 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3234 reg_emitted (reg_save_b0);
3235 insn = emit_move_insn (alt_reg, reg);
3236 RTX_FRAME_RELATED_P (insn) = 1;
3238 /* Even if we're not going to generate an epilogue, we still
3239 need to save the register so that EH works. */
3241 emit_insn (gen_prologue_use (alt_reg));
3245 alt_regno = next_scratch_gr_reg ();
3246 alt_reg = gen_rtx_REG (DImode, alt_regno);
3247 emit_move_insn (alt_reg, reg);
3248 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3253 if (current_frame_info.r[reg_save_gp])
3255 reg_emitted (reg_save_gp);
3256 insn = emit_move_insn (gen_rtx_REG (DImode,
3257 current_frame_info.r[reg_save_gp]),
3258 pic_offset_table_rtx);
3261 /* We should now be at the base of the gr/br/fr spill area. */
3262 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3263 + current_frame_info.spill_size));
3265 /* Spill all general registers. */
3266 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3267 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3269 reg = gen_rtx_REG (DImode, regno);
3270 do_spill (gen_gr_spill, reg, cfa_off, reg);
3274 /* Spill the rest of the BR registers. */
3275 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3276 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3278 alt_regno = next_scratch_gr_reg ();
3279 alt_reg = gen_rtx_REG (DImode, alt_regno);
3280 reg = gen_rtx_REG (DImode, regno);
3281 emit_move_insn (alt_reg, reg);
3282 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3286 /* Align the frame and spill all FR registers. */
3287 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3288 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3290 gcc_assert (!(cfa_off & 15));
3291 reg = gen_rtx_REG (XFmode, regno);
3292 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3296 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3298 finish_spill_pointers ();
3301 /* Called after register allocation to add any instructions needed for the
3302 epilogue. Using an epilogue insn is favored compared to putting all of the
3303 instructions in output_function_prologue(), since it allows the scheduler
3304 to intermix instructions with the saves of the caller saved registers. In
3305 some cases, it might be necessary to emit a barrier instruction as the last
3306 insn to prevent such scheduling. */
3309 ia64_expand_epilogue (int sibcall_p)
3311 rtx insn, reg, alt_reg, ar_unat_save_reg;
3312 int regno, alt_regno, cfa_off;
3314 ia64_compute_frame_size (get_frame_size ());
3316 /* If there is a frame pointer, then we use it instead of the stack
3317 pointer, so that the stack pointer does not need to be valid when
3318 the epilogue starts. See EXIT_IGNORE_STACK. */
3319 if (frame_pointer_needed)
3320 setup_spill_pointers (current_frame_info.n_spilled,
3321 hard_frame_pointer_rtx, 0);
3323 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3324 current_frame_info.total_size);
3326 if (current_frame_info.total_size != 0)
3328 /* ??? At this point we must generate a magic insn that appears to
3329 modify the spill iterators and the frame pointer. This would
3330 allow the most scheduling freedom. For now, just hard stop. */
3331 emit_insn (gen_blockage ());
3334 /* Locate the bottom of the register save area. */
3335 cfa_off = (current_frame_info.spill_cfa_off
3336 + current_frame_info.spill_size
3337 + current_frame_info.extra_spill_size);
3339 /* Restore the predicate registers. */
3340 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3342 if (current_frame_info.r[reg_save_pr] != 0)
3344 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3345 reg_emitted (reg_save_pr);
3349 alt_regno = next_scratch_gr_reg ();
3350 alt_reg = gen_rtx_REG (DImode, alt_regno);
3351 do_restore (gen_movdi_x, alt_reg, cfa_off);
3354 reg = gen_rtx_REG (DImode, PR_REG (0));
3355 emit_move_insn (reg, alt_reg);
3358 /* Restore the application registers. */
3360 /* Load the saved unat from the stack, but do not restore it until
3361 after the GRs have been restored. */
3362 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3364 if (current_frame_info.r[reg_save_ar_unat] != 0)
3367 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3368 reg_emitted (reg_save_ar_unat);
3372 alt_regno = next_scratch_gr_reg ();
3373 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3374 current_frame_info.gr_used_mask |= 1 << alt_regno;
3375 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3380 ar_unat_save_reg = NULL_RTX;
3382 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3384 reg_emitted (reg_save_ar_pfs);
3385 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3386 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3387 emit_move_insn (reg, alt_reg);
3389 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3391 alt_regno = next_scratch_gr_reg ();
3392 alt_reg = gen_rtx_REG (DImode, alt_regno);
3393 do_restore (gen_movdi_x, alt_reg, cfa_off);
3395 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3396 emit_move_insn (reg, alt_reg);
3399 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3401 if (current_frame_info.r[reg_save_ar_lc] != 0)
3403 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3404 reg_emitted (reg_save_ar_lc);
3408 alt_regno = next_scratch_gr_reg ();
3409 alt_reg = gen_rtx_REG (DImode, alt_regno);
3410 do_restore (gen_movdi_x, alt_reg, cfa_off);
3413 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3414 emit_move_insn (reg, alt_reg);
3417 /* Restore the return pointer. */
3418 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3420 if (current_frame_info.r[reg_save_b0] != 0)
3422 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3423 reg_emitted (reg_save_b0);
3427 alt_regno = next_scratch_gr_reg ();
3428 alt_reg = gen_rtx_REG (DImode, alt_regno);
3429 do_restore (gen_movdi_x, alt_reg, cfa_off);
3432 reg = gen_rtx_REG (DImode, BR_REG (0));
3433 emit_move_insn (reg, alt_reg);
3436 /* We should now be at the base of the gr/br/fr spill area. */
3437 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3438 + current_frame_info.spill_size));
3440 /* The GP may be stored on the stack in the prologue, but it's
3441 never restored in the epilogue. Skip the stack slot. */
3442 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3445 /* Restore all general registers. */
3446 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3447 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3449 reg = gen_rtx_REG (DImode, regno);
3450 do_restore (gen_gr_restore, reg, cfa_off);
3454 /* Restore the branch registers. */
3455 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3456 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3458 alt_regno = next_scratch_gr_reg ();
3459 alt_reg = gen_rtx_REG (DImode, alt_regno);
3460 do_restore (gen_movdi_x, alt_reg, cfa_off);
3462 reg = gen_rtx_REG (DImode, regno);
3463 emit_move_insn (reg, alt_reg);
3466 /* Restore floating point registers. */
3467 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3468 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3470 gcc_assert (!(cfa_off & 15));
3471 reg = gen_rtx_REG (XFmode, regno);
3472 do_restore (gen_fr_restore_x, reg, cfa_off);
3476 /* Restore ar.unat for real. */
3477 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3479 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3480 emit_move_insn (reg, ar_unat_save_reg);
3483 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3485 finish_spill_pointers ();
3487 if (current_frame_info.total_size
3488 || cfun->machine->ia64_eh_epilogue_sp
3489 || frame_pointer_needed)
3491 /* ??? At this point we must generate a magic insn that appears to
3492 modify the spill iterators, the stack pointer, and the frame
3493 pointer. This would allow the most scheduling freedom. For now,
3495 emit_insn (gen_blockage ());
3498 if (cfun->machine->ia64_eh_epilogue_sp)
3499 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3500 else if (frame_pointer_needed)
3502 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3503 RTX_FRAME_RELATED_P (insn) = 1;
3505 else if (current_frame_info.total_size)
3507 rtx offset, frame_size_rtx;
3509 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3510 if (satisfies_constraint_I (frame_size_rtx))
3511 offset = frame_size_rtx;
3514 regno = next_scratch_gr_reg ();
3515 offset = gen_rtx_REG (DImode, regno);
3516 emit_move_insn (offset, frame_size_rtx);
3519 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3522 RTX_FRAME_RELATED_P (insn) = 1;
3523 if (GET_CODE (offset) != CONST_INT)
3524 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3525 gen_rtx_SET (VOIDmode,
3527 gen_rtx_PLUS (DImode,
3532 if (cfun->machine->ia64_eh_epilogue_bsp)
3533 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3536 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3539 int fp = GR_REG (2);
3540 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3541 first available call clobbered register. If there was a frame_pointer
3542 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3543 so we have to make sure we're using the string "r2" when emitting
3544 the register name for the assembler. */
3545 if (current_frame_info.r[reg_fp]
3546 && current_frame_info.r[reg_fp] == GR_REG (2))
3547 fp = HARD_FRAME_POINTER_REGNUM;
3549 /* We must emit an alloc to force the input registers to become output
3550 registers. Otherwise, if the callee tries to pass its parameters
3551 through to another call without an intervening alloc, then these
3553 /* ??? We don't need to preserve all input registers. We only need to
3554 preserve those input registers used as arguments to the sibling call.
3555 It is unclear how to compute that number here. */
3556 if (current_frame_info.n_input_regs != 0)
3558 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3559 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3560 const0_rtx, const0_rtx,
3561 n_inputs, const0_rtx));
3562 RTX_FRAME_RELATED_P (insn) = 1;
3567 /* Return 1 if br.ret can do all the work required to return from a
3571 ia64_direct_return (void)
3573 if (reload_completed && ! frame_pointer_needed)
3575 ia64_compute_frame_size (get_frame_size ());
3577 return (current_frame_info.total_size == 0
3578 && current_frame_info.n_spilled == 0
3579 && current_frame_info.r[reg_save_b0] == 0
3580 && current_frame_info.r[reg_save_pr] == 0
3581 && current_frame_info.r[reg_save_ar_pfs] == 0
3582 && current_frame_info.r[reg_save_ar_unat] == 0
3583 && current_frame_info.r[reg_save_ar_lc] == 0);
3588 /* Return the magic cookie that we use to hold the return address
3589 during early compilation. */
3592 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3596 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3599 /* Split this value after reload, now that we know where the return
3600 address is saved. */
3603 ia64_split_return_addr_rtx (rtx dest)
3607 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3609 if (current_frame_info.r[reg_save_b0] != 0)
3611 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3612 reg_emitted (reg_save_b0);
3620 /* Compute offset from CFA for BR0. */
3621 /* ??? Must be kept in sync with ia64_expand_prologue. */
3622 off = (current_frame_info.spill_cfa_off
3623 + current_frame_info.spill_size);
3624 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3625 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3628 /* Convert CFA offset to a register based offset. */
3629 if (frame_pointer_needed)
3630 src = hard_frame_pointer_rtx;
3633 src = stack_pointer_rtx;
3634 off += current_frame_info.total_size;
3637 /* Load address into scratch register. */
3638 off_r = GEN_INT (off);
3639 if (satisfies_constraint_I (off_r))
3640 emit_insn (gen_adddi3 (dest, src, off_r));
3643 emit_move_insn (dest, off_r);
3644 emit_insn (gen_adddi3 (dest, src, dest));
3647 src = gen_rtx_MEM (Pmode, dest);
3651 src = gen_rtx_REG (DImode, BR_REG (0));
3653 emit_move_insn (dest, src);
3657 ia64_hard_regno_rename_ok (int from, int to)
3659 /* Don't clobber any of the registers we reserved for the prologue. */
3660 enum ia64_frame_regs r;
3662 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3663 if (to == current_frame_info.r[r]
3664 || from == current_frame_info.r[r]
3665 || to == emitted_frame_related_regs[r]
3666 || from == emitted_frame_related_regs[r])
3669 /* Don't use output registers outside the register frame. */
3670 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3673 /* Retain even/oddness on predicate register pairs. */
3674 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3675 return (from & 1) == (to & 1);
3680 /* Target hook for assembling integer objects. Handle word-sized
3681 aligned objects and detect the cases when @fptr is needed. */
3684 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3686 if (size == POINTER_SIZE / BITS_PER_UNIT
3687 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3688 && GET_CODE (x) == SYMBOL_REF
3689 && SYMBOL_REF_FUNCTION_P (x))
3691 static const char * const directive[2][2] = {
3692 /* 64-bit pointer */ /* 32-bit pointer */
3693 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3694 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3696 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3697 output_addr_const (asm_out_file, x);
3698 fputs (")\n", asm_out_file);
3701 return default_assemble_integer (x, size, aligned_p);
3704 /* Emit the function prologue. */
3707 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3709 int mask, grsave, grsave_prev;
3711 if (current_frame_info.need_regstk)
3712 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3713 current_frame_info.n_input_regs,
3714 current_frame_info.n_local_regs,
3715 current_frame_info.n_output_regs,
3716 current_frame_info.n_rotate_regs);
3718 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3721 /* Emit the .prologue directive. */
3724 grsave = grsave_prev = 0;
3725 if (current_frame_info.r[reg_save_b0] != 0)
3728 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3730 if (current_frame_info.r[reg_save_ar_pfs] != 0
3731 && (grsave_prev == 0
3732 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3735 if (grsave_prev == 0)
3736 grsave = current_frame_info.r[reg_save_ar_pfs];
3737 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3739 if (current_frame_info.r[reg_fp] != 0
3740 && (grsave_prev == 0
3741 || current_frame_info.r[reg_fp] == grsave_prev + 1))
3744 if (grsave_prev == 0)
3745 grsave = HARD_FRAME_POINTER_REGNUM;
3746 grsave_prev = current_frame_info.r[reg_fp];
3748 if (current_frame_info.r[reg_save_pr] != 0
3749 && (grsave_prev == 0
3750 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
3753 if (grsave_prev == 0)
3754 grsave = current_frame_info.r[reg_save_pr];
3757 if (mask && TARGET_GNU_AS)
3758 fprintf (file, "\t.prologue %d, %d\n", mask,
3759 ia64_dbx_register_number (grsave));
3761 fputs ("\t.prologue\n", file);
3763 /* Emit a .spill directive, if necessary, to relocate the base of
3764 the register spill area. */
3765 if (current_frame_info.spill_cfa_off != -16)
3766 fprintf (file, "\t.spill %ld\n",
3767 (long) (current_frame_info.spill_cfa_off
3768 + current_frame_info.spill_size));
3771 /* Emit the .body directive at the scheduled end of the prologue. */
3774 ia64_output_function_end_prologue (FILE *file)
3776 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3779 fputs ("\t.body\n", file);
3782 /* Emit the function epilogue. */
3785 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3786 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3790 if (current_frame_info.r[reg_fp])
3792 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3793 reg_names[HARD_FRAME_POINTER_REGNUM]
3794 = reg_names[current_frame_info.r[reg_fp]];
3795 reg_names[current_frame_info.r[reg_fp]] = tmp;
3796 reg_emitted (reg_fp);
3798 if (! TARGET_REG_NAMES)
3800 for (i = 0; i < current_frame_info.n_input_regs; i++)
3801 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3802 for (i = 0; i < current_frame_info.n_local_regs; i++)
3803 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3804 for (i = 0; i < current_frame_info.n_output_regs; i++)
3805 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3808 current_frame_info.initialized = 0;
3812 ia64_dbx_register_number (int regno)
3814 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3815 from its home at loc79 to something inside the register frame. We
3816 must perform the same renumbering here for the debug info. */
3817 if (current_frame_info.r[reg_fp])
3819 if (regno == HARD_FRAME_POINTER_REGNUM)
3820 regno = current_frame_info.r[reg_fp];
3821 else if (regno == current_frame_info.r[reg_fp])
3822 regno = HARD_FRAME_POINTER_REGNUM;
3825 if (IN_REGNO_P (regno))
3826 return 32 + regno - IN_REG (0);
3827 else if (LOC_REGNO_P (regno))
3828 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3829 else if (OUT_REGNO_P (regno))
3830 return (32 + current_frame_info.n_input_regs
3831 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3837 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3839 rtx addr_reg, eight = GEN_INT (8);
3841 /* The Intel assembler requires that the global __ia64_trampoline symbol
3842 be declared explicitly */
3845 static bool declared_ia64_trampoline = false;
3847 if (!declared_ia64_trampoline)
3849 declared_ia64_trampoline = true;
3850 (*targetm.asm_out.globalize_label) (asm_out_file,
3851 "__ia64_trampoline");
3855 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3856 addr = convert_memory_address (Pmode, addr);
3857 fnaddr = convert_memory_address (Pmode, fnaddr);
3858 static_chain = convert_memory_address (Pmode, static_chain);
3860 /* Load up our iterator. */
3861 addr_reg = gen_reg_rtx (Pmode);
3862 emit_move_insn (addr_reg, addr);
3864 /* The first two words are the fake descriptor:
3865 __ia64_trampoline, ADDR+16. */
3866 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3867 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3868 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3870 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3871 copy_to_reg (plus_constant (addr, 16)));
3872 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3874 /* The third word is the target descriptor. */
3875 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3876 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3878 /* The fourth word is the static chain. */
3879 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3882 /* Do any needed setup for a variadic function. CUM has not been updated
3883 for the last named argument which has type TYPE and mode MODE.
3885 We generate the actual spill instructions during prologue generation. */
3888 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3889 tree type, int * pretend_size,
3890 int second_time ATTRIBUTE_UNUSED)
3892 CUMULATIVE_ARGS next_cum = *cum;
3894 /* Skip the current argument. */
3895 ia64_function_arg_advance (&next_cum, mode, type, 1);
3897 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3899 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3900 *pretend_size = n * UNITS_PER_WORD;
3901 cfun->machine->n_varargs = n;
3905 /* Check whether TYPE is a homogeneous floating point aggregate. If
3906 it is, return the mode of the floating point type that appears
3907 in all leafs. If it is not, return VOIDmode.
3909 An aggregate is a homogeneous floating point aggregate is if all
3910 fields/elements in it have the same floating point type (e.g,
3911 SFmode). 128-bit quad-precision floats are excluded.
3913 Variable sized aggregates should never arrive here, since we should
3914 have already decided to pass them by reference. Top-level zero-sized
3915 aggregates are excluded because our parallels crash the middle-end. */
3917 static enum machine_mode
3918 hfa_element_mode (const_tree type, bool nested)
3920 enum machine_mode element_mode = VOIDmode;
3921 enum machine_mode mode;
3922 enum tree_code code = TREE_CODE (type);
3923 int know_element_mode = 0;
3926 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3931 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3932 case BOOLEAN_TYPE: case POINTER_TYPE:
3933 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3934 case LANG_TYPE: case FUNCTION_TYPE:
3937 /* Fortran complex types are supposed to be HFAs, so we need to handle
3938 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3941 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3942 && TYPE_MODE (type) != TCmode)
3943 return GET_MODE_INNER (TYPE_MODE (type));
3948 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3949 mode if this is contained within an aggregate. */
3950 if (nested && TYPE_MODE (type) != TFmode)
3951 return TYPE_MODE (type);
3956 return hfa_element_mode (TREE_TYPE (type), 1);
3960 case QUAL_UNION_TYPE:
3961 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3963 if (TREE_CODE (t) != FIELD_DECL)
3966 mode = hfa_element_mode (TREE_TYPE (t), 1);
3967 if (know_element_mode)
3969 if (mode != element_mode)
3972 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3976 know_element_mode = 1;
3977 element_mode = mode;
3980 return element_mode;
3983 /* If we reach here, we probably have some front-end specific type
3984 that the backend doesn't know about. This can happen via the
3985 aggregate_value_p call in init_function_start. All we can do is
3986 ignore unknown tree types. */
3993 /* Return the number of words required to hold a quantity of TYPE and MODE
3994 when passed as an argument. */
3996 ia64_function_arg_words (tree type, enum machine_mode mode)
4000 if (mode == BLKmode)
4001 words = int_size_in_bytes (type);
4003 words = GET_MODE_SIZE (mode);
4005 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4008 /* Return the number of registers that should be skipped so the current
4009 argument (described by TYPE and WORDS) will be properly aligned.
4011 Integer and float arguments larger than 8 bytes start at the next
4012 even boundary. Aggregates larger than 8 bytes start at the next
4013 even boundary if the aggregate has 16 byte alignment. Note that
4014 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4015 but are still to be aligned in registers.
4017 ??? The ABI does not specify how to handle aggregates with
4018 alignment from 9 to 15 bytes, or greater than 16. We handle them
4019 all as if they had 16 byte alignment. Such aggregates can occur
4020 only if gcc extensions are used. */
4022 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4024 if ((cum->words & 1) == 0)
4028 && TREE_CODE (type) != INTEGER_TYPE
4029 && TREE_CODE (type) != REAL_TYPE)
4030 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4035 /* Return rtx for register where argument is passed, or zero if it is passed
4037 /* ??? 128-bit quad-precision floats are always passed in general
4041 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4042 int named, int incoming)
4044 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4045 int words = ia64_function_arg_words (type, mode);
4046 int offset = ia64_function_arg_offset (cum, type, words);
4047 enum machine_mode hfa_mode = VOIDmode;
4049 /* If all argument slots are used, then it must go on the stack. */
4050 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4053 /* Check for and handle homogeneous FP aggregates. */
4055 hfa_mode = hfa_element_mode (type, 0);
4057 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4058 and unprototyped hfas are passed specially. */
4059 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4063 int fp_regs = cum->fp_regs;
4064 int int_regs = cum->words + offset;
4065 int hfa_size = GET_MODE_SIZE (hfa_mode);
4069 /* If prototyped, pass it in FR regs then GR regs.
4070 If not prototyped, pass it in both FR and GR regs.
4072 If this is an SFmode aggregate, then it is possible to run out of
4073 FR regs while GR regs are still left. In that case, we pass the
4074 remaining part in the GR regs. */
4076 /* Fill the FP regs. We do this always. We stop if we reach the end
4077 of the argument, the last FP register, or the last argument slot. */
4079 byte_size = ((mode == BLKmode)
4080 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4081 args_byte_size = int_regs * UNITS_PER_WORD;
4083 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4084 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4086 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4087 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4091 args_byte_size += hfa_size;
4095 /* If no prototype, then the whole thing must go in GR regs. */
4096 if (! cum->prototype)
4098 /* If this is an SFmode aggregate, then we might have some left over
4099 that needs to go in GR regs. */
4100 else if (byte_size != offset)
4101 int_regs += offset / UNITS_PER_WORD;
4103 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4105 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4107 enum machine_mode gr_mode = DImode;
4108 unsigned int gr_size;
4110 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4111 then this goes in a GR reg left adjusted/little endian, right
4112 adjusted/big endian. */
4113 /* ??? Currently this is handled wrong, because 4-byte hunks are
4114 always right adjusted/little endian. */
4117 /* If we have an even 4 byte hunk because the aggregate is a
4118 multiple of 4 bytes in size, then this goes in a GR reg right
4119 adjusted/little endian. */
4120 else if (byte_size - offset == 4)
4123 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4124 gen_rtx_REG (gr_mode, (basereg
4128 gr_size = GET_MODE_SIZE (gr_mode);
4130 if (gr_size == UNITS_PER_WORD
4131 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4133 else if (gr_size > UNITS_PER_WORD)
4134 int_regs += gr_size / UNITS_PER_WORD;
4136 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4139 /* Integral and aggregates go in general registers. If we have run out of
4140 FR registers, then FP values must also go in general registers. This can
4141 happen when we have a SFmode HFA. */
4142 else if (mode == TFmode || mode == TCmode
4143 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4145 int byte_size = ((mode == BLKmode)
4146 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4147 if (BYTES_BIG_ENDIAN
4148 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4149 && byte_size < UNITS_PER_WORD
4152 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4153 gen_rtx_REG (DImode,
4154 (basereg + cum->words
4157 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4160 return gen_rtx_REG (mode, basereg + cum->words + offset);
4164 /* If there is a prototype, then FP values go in a FR register when
4165 named, and in a GR register when unnamed. */
4166 else if (cum->prototype)
4169 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4170 /* In big-endian mode, an anonymous SFmode value must be represented
4171 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4172 the value into the high half of the general register. */
4173 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4174 return gen_rtx_PARALLEL (mode,
4176 gen_rtx_EXPR_LIST (VOIDmode,
4177 gen_rtx_REG (DImode, basereg + cum->words + offset),
4180 return gen_rtx_REG (mode, basereg + cum->words + offset);
4182 /* If there is no prototype, then FP values go in both FR and GR
4186 /* See comment above. */
4187 enum machine_mode inner_mode =
4188 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4190 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4191 gen_rtx_REG (mode, (FR_ARG_FIRST
4194 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4195 gen_rtx_REG (inner_mode,
4196 (basereg + cum->words
4200 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4204 /* Return number of bytes, at the beginning of the argument, that must be
4205 put in registers. 0 is the argument is entirely in registers or entirely
4209 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4210 tree type, bool named ATTRIBUTE_UNUSED)
4212 int words = ia64_function_arg_words (type, mode);
4213 int offset = ia64_function_arg_offset (cum, type, words);
4215 /* If all argument slots are used, then it must go on the stack. */
4216 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4219 /* It doesn't matter whether the argument goes in FR or GR regs. If
4220 it fits within the 8 argument slots, then it goes entirely in
4221 registers. If it extends past the last argument slot, then the rest
4222 goes on the stack. */
4224 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4227 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4230 /* Update CUM to point after this argument. This is patterned after
4231 ia64_function_arg. */
4234 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4235 tree type, int named)
4237 int words = ia64_function_arg_words (type, mode);
4238 int offset = ia64_function_arg_offset (cum, type, words);
4239 enum machine_mode hfa_mode = VOIDmode;
4241 /* If all arg slots are already full, then there is nothing to do. */
4242 if (cum->words >= MAX_ARGUMENT_SLOTS)
4245 cum->words += words + offset;
4247 /* Check for and handle homogeneous FP aggregates. */
4249 hfa_mode = hfa_element_mode (type, 0);
4251 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4252 and unprototyped hfas are passed specially. */
4253 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4255 int fp_regs = cum->fp_regs;
4256 /* This is the original value of cum->words + offset. */
4257 int int_regs = cum->words - words;
4258 int hfa_size = GET_MODE_SIZE (hfa_mode);
4262 /* If prototyped, pass it in FR regs then GR regs.
4263 If not prototyped, pass it in both FR and GR regs.
4265 If this is an SFmode aggregate, then it is possible to run out of
4266 FR regs while GR regs are still left. In that case, we pass the
4267 remaining part in the GR regs. */
4269 /* Fill the FP regs. We do this always. We stop if we reach the end
4270 of the argument, the last FP register, or the last argument slot. */
4272 byte_size = ((mode == BLKmode)
4273 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4274 args_byte_size = int_regs * UNITS_PER_WORD;
4276 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4277 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4280 args_byte_size += hfa_size;
4284 cum->fp_regs = fp_regs;
4287 /* Integral and aggregates go in general registers. So do TFmode FP values.
4288 If we have run out of FR registers, then other FP values must also go in
4289 general registers. This can happen when we have a SFmode HFA. */
4290 else if (mode == TFmode || mode == TCmode
4291 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4292 cum->int_regs = cum->words;
4294 /* If there is a prototype, then FP values go in a FR register when
4295 named, and in a GR register when unnamed. */
4296 else if (cum->prototype)
4299 cum->int_regs = cum->words;
4301 /* ??? Complex types should not reach here. */
4302 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4304 /* If there is no prototype, then FP values go in both FR and GR
4308 /* ??? Complex types should not reach here. */
4309 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4310 cum->int_regs = cum->words;
4314 /* Arguments with alignment larger than 8 bytes start at the next even
4315 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4316 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4319 ia64_function_arg_boundary (enum machine_mode mode, tree type)
4322 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4323 return PARM_BOUNDARY * 2;
4327 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4328 return PARM_BOUNDARY * 2;
4330 return PARM_BOUNDARY;
4333 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4334 return PARM_BOUNDARY * 2;
4336 return PARM_BOUNDARY;
4339 /* True if it is OK to do sibling call optimization for the specified
4340 call expression EXP. DECL will be the called function, or NULL if
4341 this is an indirect call. */
4343 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4345 /* We can't perform a sibcall if the current function has the syscall_linkage
4347 if (lookup_attribute ("syscall_linkage",
4348 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4351 /* We must always return with our current GP. This means we can
4352 only sibcall to functions defined in the current module unless
4353 TARGET_CONST_GP is set to true. */
4354 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4358 /* Implement va_arg. */
4361 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4364 /* Variable sized types are passed by reference. */
4365 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4367 tree ptrtype = build_pointer_type (type);
4368 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4369 return build_va_arg_indirect_ref (addr);
4372 /* Aggregate arguments with alignment larger than 8 bytes start at
4373 the next even boundary. Integer and floating point arguments
4374 do so if they are larger than 8 bytes, whether or not they are
4375 also aligned larger than 8 bytes. */
4376 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4377 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4379 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4380 size_int (2 * UNITS_PER_WORD - 1));
4381 t = fold_convert (sizetype, t);
4382 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4383 size_int (-2 * UNITS_PER_WORD));
4384 t = fold_convert (TREE_TYPE (valist), t);
4385 gimplify_assign (unshare_expr (valist), t, pre_p);
4388 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4391 /* Return 1 if function return value returned in memory. Return 0 if it is
4395 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4397 enum machine_mode mode;
4398 enum machine_mode hfa_mode;
4399 HOST_WIDE_INT byte_size;
4401 mode = TYPE_MODE (valtype);
4402 byte_size = GET_MODE_SIZE (mode);
4403 if (mode == BLKmode)
4405 byte_size = int_size_in_bytes (valtype);
4410 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4412 hfa_mode = hfa_element_mode (valtype, 0);
4413 if (hfa_mode != VOIDmode)
4415 int hfa_size = GET_MODE_SIZE (hfa_mode);
4417 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4422 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4428 /* Return rtx for register that holds the function return value. */
4431 ia64_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
4433 enum machine_mode mode;
4434 enum machine_mode hfa_mode;
4436 mode = TYPE_MODE (valtype);
4437 hfa_mode = hfa_element_mode (valtype, 0);
4439 if (hfa_mode != VOIDmode)
4447 hfa_size = GET_MODE_SIZE (hfa_mode);
4448 byte_size = ((mode == BLKmode)
4449 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4451 for (i = 0; offset < byte_size; i++)
4453 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4454 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4458 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4460 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4461 return gen_rtx_REG (mode, FR_ARG_FIRST);
4464 bool need_parallel = false;
4466 /* In big-endian mode, we need to manage the layout of aggregates
4467 in the registers so that we get the bits properly aligned in
4468 the highpart of the registers. */
4469 if (BYTES_BIG_ENDIAN
4470 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4471 need_parallel = true;
4473 /* Something like struct S { long double x; char a[0] } is not an
4474 HFA structure, and therefore doesn't go in fp registers. But
4475 the middle-end will give it XFmode anyway, and XFmode values
4476 don't normally fit in integer registers. So we need to smuggle
4477 the value inside a parallel. */
4478 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4479 need_parallel = true;
4489 bytesize = int_size_in_bytes (valtype);
4490 /* An empty PARALLEL is invalid here, but the return value
4491 doesn't matter for empty structs. */
4493 return gen_rtx_REG (mode, GR_RET_FIRST);
4494 for (i = 0; offset < bytesize; i++)
4496 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4497 gen_rtx_REG (DImode,
4500 offset += UNITS_PER_WORD;
4502 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4505 return gen_rtx_REG (mode, GR_RET_FIRST);
4509 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4510 We need to emit DTP-relative relocations. */
4513 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4515 gcc_assert (size == 4 || size == 8);
4517 fputs ("\tdata4.ua\t@dtprel(", file);
4519 fputs ("\tdata8.ua\t@dtprel(", file);
4520 output_addr_const (file, x);
4524 /* Print a memory address as an operand to reference that memory location. */
4526 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4527 also call this from ia64_print_operand for memory addresses. */
4530 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4531 rtx address ATTRIBUTE_UNUSED)
4535 /* Print an operand to an assembler instruction.
4536 C Swap and print a comparison operator.
4537 D Print an FP comparison operator.
4538 E Print 32 - constant, for SImode shifts as extract.
4539 e Print 64 - constant, for DImode rotates.
4540 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4541 a floating point register emitted normally.
4542 G A floating point constant.
4543 I Invert a predicate register by adding 1.
4544 J Select the proper predicate register for a condition.
4545 j Select the inverse predicate register for a condition.
4546 O Append .acq for volatile load.
4547 P Postincrement of a MEM.
4548 Q Append .rel for volatile store.
4549 R Print .s .d or nothing for a single, double or no truncation.
4550 S Shift amount for shladd instruction.
4551 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4552 for Intel assembler.
4553 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4554 for Intel assembler.
4555 X A pair of floating point registers.
4556 r Print register name, or constant 0 as r0. HP compatibility for
4558 v Print vector constant value as an 8-byte integer value. */
4561 ia64_print_operand (FILE * file, rtx x, int code)
4568 /* Handled below. */
4573 enum rtx_code c = swap_condition (GET_CODE (x));
4574 fputs (GET_RTX_NAME (c), file);
4579 switch (GET_CODE (x))
4603 str = GET_RTX_NAME (GET_CODE (x));
4610 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4614 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4618 if (x == CONST0_RTX (GET_MODE (x)))
4619 str = reg_names [FR_REG (0)];
4620 else if (x == CONST1_RTX (GET_MODE (x)))
4621 str = reg_names [FR_REG (1)];
4624 gcc_assert (GET_CODE (x) == REG);
4625 str = reg_names [REGNO (x)];
4634 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4635 real_to_target (val, &rv, GET_MODE (x));
4636 if (GET_MODE (x) == SFmode)
4637 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4638 else if (GET_MODE (x) == DFmode)
4639 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4641 (WORDS_BIG_ENDIAN ? val[1] : val[0])
4644 output_operand_lossage ("invalid %%G mode");
4649 fputs (reg_names [REGNO (x) + 1], file);
4655 unsigned int regno = REGNO (XEXP (x, 0));
4656 if (GET_CODE (x) == EQ)
4660 fputs (reg_names [regno], file);
4665 if (MEM_VOLATILE_P (x))
4666 fputs(".acq", file);
4671 HOST_WIDE_INT value;
4673 switch (GET_CODE (XEXP (x, 0)))
4679 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4680 if (GET_CODE (x) == CONST_INT)
4684 gcc_assert (GET_CODE (x) == REG);
4685 fprintf (file, ", %s", reg_names[REGNO (x)]);
4691 value = GET_MODE_SIZE (GET_MODE (x));
4695 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4699 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4704 if (MEM_VOLATILE_P (x))
4705 fputs(".rel", file);
4709 if (x == CONST0_RTX (GET_MODE (x)))
4711 else if (x == CONST1_RTX (GET_MODE (x)))
4713 else if (x == CONST2_RTX (GET_MODE (x)))
4716 output_operand_lossage ("invalid %%R value");
4720 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4724 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4726 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4732 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4734 const char *prefix = "0x";
4735 if (INTVAL (x) & 0x80000000)
4737 fprintf (file, "0xffffffff");
4740 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4747 unsigned int regno = REGNO (x);
4748 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4753 /* If this operand is the constant zero, write it as register zero.
4754 Any register, zero, or CONST_INT value is OK here. */
4755 if (GET_CODE (x) == REG)
4756 fputs (reg_names[REGNO (x)], file);
4757 else if (x == CONST0_RTX (GET_MODE (x)))
4759 else if (GET_CODE (x) == CONST_INT)
4760 output_addr_const (file, x);
4762 output_operand_lossage ("invalid %%r value");
4766 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4767 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4774 /* For conditional branches, returns or calls, substitute
4775 sptk, dptk, dpnt, or spnt for %s. */
4776 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4779 int pred_val = INTVAL (XEXP (x, 0));
4781 /* Guess top and bottom 10% statically predicted. */
4782 if (pred_val < REG_BR_PROB_BASE / 50
4783 && br_prob_note_reliable_p (x))
4785 else if (pred_val < REG_BR_PROB_BASE / 2)
4787 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
4788 || !br_prob_note_reliable_p (x))
4793 else if (GET_CODE (current_output_insn) == CALL_INSN)
4798 fputs (which, file);
4803 x = current_insn_predicate;
4806 unsigned int regno = REGNO (XEXP (x, 0));
4807 if (GET_CODE (x) == EQ)
4809 fprintf (file, "(%s) ", reg_names [regno]);
4814 output_operand_lossage ("ia64_print_operand: unknown code");
4818 switch (GET_CODE (x))
4820 /* This happens for the spill/restore instructions. */
4825 /* ... fall through ... */
4828 fputs (reg_names [REGNO (x)], file);
4833 rtx addr = XEXP (x, 0);
4834 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4835 addr = XEXP (addr, 0);
4836 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4841 output_addr_const (file, x);
4848 /* Compute a (partial) cost for rtx X. Return true if the complete
4849 cost has been computed, and false if subexpressions should be
4850 scanned. In either case, *TOTAL contains the cost result. */
4851 /* ??? This is incomplete. */
4854 ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
4855 bool speed ATTRIBUTE_UNUSED)
4863 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
4866 if (satisfies_constraint_I (x))
4868 else if (satisfies_constraint_J (x))
4871 *total = COSTS_N_INSNS (1);
4874 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
4877 *total = COSTS_N_INSNS (1);
4882 *total = COSTS_N_INSNS (1);
4888 *total = COSTS_N_INSNS (3);
4892 /* For multiplies wider than HImode, we have to go to the FPU,
4893 which normally involves copies. Plus there's the latency
4894 of the multiply itself, and the latency of the instructions to
4895 transfer integer regs to FP regs. */
4896 /* ??? Check for FP mode. */
4897 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4898 *total = COSTS_N_INSNS (10);
4900 *total = COSTS_N_INSNS (2);
4908 *total = COSTS_N_INSNS (1);
4915 /* We make divide expensive, so that divide-by-constant will be
4916 optimized to a multiply. */
4917 *total = COSTS_N_INSNS (60);
4925 /* Calculate the cost of moving data from a register in class FROM to
4926 one in class TO, using MODE. */
4929 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4932 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4933 if (to == ADDL_REGS)
4935 if (from == ADDL_REGS)
4938 /* All costs are symmetric, so reduce cases by putting the
4939 lower number class as the destination. */
4942 enum reg_class tmp = to;
4943 to = from, from = tmp;
4946 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4947 so that we get secondary memory reloads. Between FR_REGS,
4948 we have to make this at least as expensive as MEMORY_MOVE_COST
4949 to avoid spectacularly poor register class preferencing. */
4950 if (mode == XFmode || mode == RFmode)
4952 if (to != GR_REGS || from != GR_REGS)
4953 return MEMORY_MOVE_COST (mode, to, 0);
4961 /* Moving between PR registers takes two insns. */
4962 if (from == PR_REGS)
4964 /* Moving between PR and anything but GR is impossible. */
4965 if (from != GR_REGS)
4966 return MEMORY_MOVE_COST (mode, to, 0);
4970 /* Moving between BR and anything but GR is impossible. */
4971 if (from != GR_REGS && from != GR_AND_BR_REGS)
4972 return MEMORY_MOVE_COST (mode, to, 0);
4977 /* Moving between AR and anything but GR is impossible. */
4978 if (from != GR_REGS)
4979 return MEMORY_MOVE_COST (mode, to, 0);
4985 case GR_AND_FR_REGS:
4986 case GR_AND_BR_REGS:
4997 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
4998 to use when copying X into that class. */
5001 ia64_preferred_reload_class (rtx x, enum reg_class rclass)
5007 /* Don't allow volatile mem reloads into floating point registers.
5008 This is defined to force reload to choose the r/m case instead
5009 of the f/f case when reloading (set (reg fX) (mem/v)). */
5010 if (MEM_P (x) && MEM_VOLATILE_P (x))
5013 /* Force all unrecognized constants into the constant pool. */
5031 /* This function returns the register class required for a secondary
5032 register when copying between one of the registers in RCLASS, and X,
5033 using MODE. A return value of NO_REGS means that no secondary register
5037 ia64_secondary_reload_class (enum reg_class rclass,
5038 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5042 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5043 regno = true_regnum (x);
5050 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5051 interaction. We end up with two pseudos with overlapping lifetimes
5052 both of which are equiv to the same constant, and both which need
5053 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5054 changes depending on the path length, which means the qty_first_reg
5055 check in make_regs_eqv can give different answers at different times.
5056 At some point I'll probably need a reload_indi pattern to handle
5059 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5060 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5061 non-general registers for good measure. */
5062 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5065 /* This is needed if a pseudo used as a call_operand gets spilled to a
5067 if (GET_CODE (x) == MEM)
5073 /* Need to go through general registers to get to other class regs. */
5074 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5077 /* This can happen when a paradoxical subreg is an operand to the
5079 /* ??? This shouldn't be necessary after instruction scheduling is
5080 enabled, because paradoxical subregs are not accepted by
5081 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5082 stop the paradoxical subreg stupidity in the *_operand functions
5084 if (GET_CODE (x) == MEM
5085 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5086 || GET_MODE (x) == QImode))
5089 /* This can happen because of the ior/and/etc patterns that accept FP
5090 registers as operands. If the third operand is a constant, then it
5091 needs to be reloaded into a FP register. */
5092 if (GET_CODE (x) == CONST_INT)
5095 /* This can happen because of register elimination in a muldi3 insn.
5096 E.g. `26107 * (unsigned long)&u'. */
5097 if (GET_CODE (x) == PLUS)
5102 /* ??? This happens if we cse/gcse a BImode value across a call,
5103 and the function has a nonlocal goto. This is because global
5104 does not allocate call crossing pseudos to hard registers when
5105 crtl->has_nonlocal_goto is true. This is relatively
5106 common for C++ programs that use exceptions. To reproduce,
5107 return NO_REGS and compile libstdc++. */
5108 if (GET_CODE (x) == MEM)
5111 /* This can happen when we take a BImode subreg of a DImode value,
5112 and that DImode value winds up in some non-GR register. */
5113 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5125 /* Implement targetm.unspec_may_trap_p hook. */
5127 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5129 if (GET_CODE (x) == UNSPEC)
5131 switch (XINT (x, 1))
5137 case UNSPEC_CHKACLR:
5139 /* These unspecs are just wrappers. */
5140 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5144 return default_unspec_may_trap_p (x, flags);
5148 /* Parse the -mfixed-range= option string. */
5151 fix_range (const char *const_str)
5154 char *str, *dash, *comma;
5156 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5157 REG2 are either register names or register numbers. The effect
5158 of this option is to mark the registers in the range from REG1 to
5159 REG2 as ``fixed'' so they won't be used by the compiler. This is
5160 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5162 i = strlen (const_str);
5163 str = (char *) alloca (i + 1);
5164 memcpy (str, const_str, i + 1);
5168 dash = strchr (str, '-');
5171 warning (0, "value of -mfixed-range must have form REG1-REG2");
5176 comma = strchr (dash + 1, ',');
5180 first = decode_reg_name (str);
5183 warning (0, "unknown register name: %s", str);
5187 last = decode_reg_name (dash + 1);
5190 warning (0, "unknown register name: %s", dash + 1);
5198 warning (0, "%s-%s is an empty range", str, dash + 1);
5202 for (i = first; i <= last; ++i)
5203 fixed_regs[i] = call_used_regs[i] = 1;
5213 /* Implement TARGET_HANDLE_OPTION. */
5216 ia64_handle_option (size_t code, const char *arg, int value)
5218 static bool warned_itanium1_deprecated;
5222 case OPT_mfixed_range_:
5226 case OPT_mtls_size_:
5227 if (value != 14 && value != 22 && value != 64)
5228 error ("bad value %<%s%> for -mtls-size= switch", arg);
5235 const char *name; /* processor name or nickname. */
5236 enum processor_type processor;
5238 const processor_alias_table[] =
5240 {"itanium", PROCESSOR_ITANIUM},
5241 {"itanium1", PROCESSOR_ITANIUM},
5242 {"merced", PROCESSOR_ITANIUM},
5243 {"itanium2", PROCESSOR_ITANIUM2},
5244 {"mckinley", PROCESSOR_ITANIUM2},
5246 int const pta_size = ARRAY_SIZE (processor_alias_table);
5249 for (i = 0; i < pta_size; i++)
5250 if (!strcmp (arg, processor_alias_table[i].name))
5252 ia64_tune = processor_alias_table[i].processor;
5253 if (ia64_tune == PROCESSOR_ITANIUM
5254 && ! warned_itanium1_deprecated)
5257 "value %<%s%> for -mtune= switch is deprecated",
5259 inform (0, "GCC 4.4 is the last release with "
5260 "Itanium1 tuning support");
5261 warned_itanium1_deprecated = true;
5266 error ("bad value %<%s%> for -mtune= switch", arg);
5275 /* Implement OVERRIDE_OPTIONS. */
5278 ia64_override_options (void)
5280 if (TARGET_AUTO_PIC)
5281 target_flags |= MASK_CONST_GP;
5283 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
5285 warning (0, "not yet implemented: latency-optimized inline square root");
5286 TARGET_INLINE_SQRT = INL_MAX_THR;
5289 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5290 flag_schedule_insns_after_reload = 0;
5293 && ! sel_sched_switch_set)
5295 flag_selective_scheduling2 = 1;
5296 flag_sel_sched_pipelining = 1;
5298 if (mflag_sched_control_spec == 2)
5300 /* Control speculation is on by default for the selective scheduler,
5301 but not for the Haifa scheduler. */
5302 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5304 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5306 /* FIXME: remove this when we'd implement breaking autoinsns as
5307 a transformation. */
5308 flag_auto_inc_dec = 0;
5311 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5313 init_machine_status = ia64_init_machine_status;
5315 if (align_functions <= 0)
5316 align_functions = 64;
5317 if (align_loops <= 0)
5321 /* Initialize the record of emitted frame related registers. */
5323 void ia64_init_expanders (void)
5325 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5328 static struct machine_function *
5329 ia64_init_machine_status (void)
5331 return GGC_CNEW (struct machine_function);
5334 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5335 static enum attr_type ia64_safe_type (rtx);
5337 static enum attr_itanium_class
5338 ia64_safe_itanium_class (rtx insn)
5340 if (recog_memoized (insn) >= 0)
5341 return get_attr_itanium_class (insn);
5343 return ITANIUM_CLASS_UNKNOWN;
5346 static enum attr_type
5347 ia64_safe_type (rtx insn)
5349 if (recog_memoized (insn) >= 0)
5350 return get_attr_type (insn);
5352 return TYPE_UNKNOWN;
5355 /* The following collection of routines emit instruction group stop bits as
5356 necessary to avoid dependencies. */
5358 /* Need to track some additional registers as far as serialization is
5359 concerned so we can properly handle br.call and br.ret. We could
5360 make these registers visible to gcc, but since these registers are
5361 never explicitly used in gcc generated code, it seems wasteful to
5362 do so (plus it would make the call and return patterns needlessly
5364 #define REG_RP (BR_REG (0))
5365 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5366 /* This is used for volatile asms which may require a stop bit immediately
5367 before and after them. */
5368 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5369 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5370 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5372 /* For each register, we keep track of how it has been written in the
5373 current instruction group.
5375 If a register is written unconditionally (no qualifying predicate),
5376 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5378 If a register is written if its qualifying predicate P is true, we
5379 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5380 may be written again by the complement of P (P^1) and when this happens,
5381 WRITE_COUNT gets set to 2.
5383 The result of this is that whenever an insn attempts to write a register
5384 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5386 If a predicate register is written by a floating-point insn, we set
5387 WRITTEN_BY_FP to true.
5389 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5390 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5392 #if GCC_VERSION >= 4000
5393 #define RWS_FIELD_TYPE __extension__ unsigned short
5395 #define RWS_FIELD_TYPE unsigned int
5397 struct reg_write_state
5399 RWS_FIELD_TYPE write_count : 2;
5400 RWS_FIELD_TYPE first_pred : 10;
5401 RWS_FIELD_TYPE written_by_fp : 1;
5402 RWS_FIELD_TYPE written_by_and : 1;
5403 RWS_FIELD_TYPE written_by_or : 1;
5406 /* Cumulative info for the current instruction group. */
5407 struct reg_write_state rws_sum[NUM_REGS];
5408 #ifdef ENABLE_CHECKING
5409 /* Bitmap whether a register has been written in the current insn. */
5410 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5411 / HOST_BITS_PER_WIDEST_FAST_INT];
5414 rws_insn_set (int regno)
5416 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5417 SET_HARD_REG_BIT (rws_insn, regno);
5421 rws_insn_test (int regno)
5423 return TEST_HARD_REG_BIT (rws_insn, regno);
5426 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5427 unsigned char rws_insn[2];
5430 rws_insn_set (int regno)
5432 if (regno == REG_AR_CFM)
5434 else if (regno == REG_VOLATILE)
5439 rws_insn_test (int regno)
5441 if (regno == REG_AR_CFM)
5443 if (regno == REG_VOLATILE)
5449 /* Indicates whether this is the first instruction after a stop bit,
5450 in which case we don't need another stop bit. Without this,
5451 ia64_variable_issue will die when scheduling an alloc. */
5452 static int first_instruction;
5454 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5455 RTL for one instruction. */
5458 unsigned int is_write : 1; /* Is register being written? */
5459 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5460 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5461 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5462 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5463 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5466 static void rws_update (int, struct reg_flags, int);
5467 static int rws_access_regno (int, struct reg_flags, int);
5468 static int rws_access_reg (rtx, struct reg_flags, int);
5469 static void update_set_flags (rtx, struct reg_flags *);
5470 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5471 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5472 static void init_insn_group_barriers (void);
5473 static int group_barrier_needed (rtx);
5474 static int safe_group_barrier_needed (rtx);
5475 static int in_safe_group_barrier;
5477 /* Update *RWS for REGNO, which is being written by the current instruction,
5478 with predicate PRED, and associated register flags in FLAGS. */
5481 rws_update (int regno, struct reg_flags flags, int pred)
5484 rws_sum[regno].write_count++;
5486 rws_sum[regno].write_count = 2;
5487 rws_sum[regno].written_by_fp |= flags.is_fp;
5488 /* ??? Not tracking and/or across differing predicates. */
5489 rws_sum[regno].written_by_and = flags.is_and;
5490 rws_sum[regno].written_by_or = flags.is_or;
5491 rws_sum[regno].first_pred = pred;
5494 /* Handle an access to register REGNO of type FLAGS using predicate register
5495 PRED. Update rws_sum array. Return 1 if this access creates
5496 a dependency with an earlier instruction in the same group. */
5499 rws_access_regno (int regno, struct reg_flags flags, int pred)
5501 int need_barrier = 0;
5503 gcc_assert (regno < NUM_REGS);
5505 if (! PR_REGNO_P (regno))
5506 flags.is_and = flags.is_or = 0;
5512 rws_insn_set (regno);
5513 write_count = rws_sum[regno].write_count;
5515 switch (write_count)
5518 /* The register has not been written yet. */
5519 if (!in_safe_group_barrier)
5520 rws_update (regno, flags, pred);
5524 /* The register has been written via a predicate. If this is
5525 not a complementary predicate, then we need a barrier. */
5526 /* ??? This assumes that P and P+1 are always complementary
5527 predicates for P even. */
5528 if (flags.is_and && rws_sum[regno].written_by_and)
5530 else if (flags.is_or && rws_sum[regno].written_by_or)
5532 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5534 if (!in_safe_group_barrier)
5535 rws_update (regno, flags, pred);
5539 /* The register has been unconditionally written already. We
5541 if (flags.is_and && rws_sum[regno].written_by_and)
5543 else if (flags.is_or && rws_sum[regno].written_by_or)
5547 if (!in_safe_group_barrier)
5549 rws_sum[regno].written_by_and = flags.is_and;
5550 rws_sum[regno].written_by_or = flags.is_or;
5560 if (flags.is_branch)
5562 /* Branches have several RAW exceptions that allow to avoid
5565 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5566 /* RAW dependencies on branch regs are permissible as long
5567 as the writer is a non-branch instruction. Since we
5568 never generate code that uses a branch register written
5569 by a branch instruction, handling this case is
5573 if (REGNO_REG_CLASS (regno) == PR_REGS
5574 && ! rws_sum[regno].written_by_fp)
5575 /* The predicates of a branch are available within the
5576 same insn group as long as the predicate was written by
5577 something other than a floating-point instruction. */
5581 if (flags.is_and && rws_sum[regno].written_by_and)
5583 if (flags.is_or && rws_sum[regno].written_by_or)
5586 switch (rws_sum[regno].write_count)
5589 /* The register has not been written yet. */
5593 /* The register has been written via a predicate. If this is
5594 not a complementary predicate, then we need a barrier. */
5595 /* ??? This assumes that P and P+1 are always complementary
5596 predicates for P even. */
5597 if ((rws_sum[regno].first_pred ^ 1) != pred)
5602 /* The register has been unconditionally written already. We
5612 return need_barrier;
5616 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5618 int regno = REGNO (reg);
5619 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5622 return rws_access_regno (regno, flags, pred);
5625 int need_barrier = 0;
5627 need_barrier |= rws_access_regno (regno + n, flags, pred);
5628 return need_barrier;
5632 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5633 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5636 update_set_flags (rtx x, struct reg_flags *pflags)
5638 rtx src = SET_SRC (x);
5640 switch (GET_CODE (src))
5646 /* There are four cases here:
5647 (1) The destination is (pc), in which case this is a branch,
5648 nothing here applies.
5649 (2) The destination is ar.lc, in which case this is a
5650 doloop_end_internal,
5651 (3) The destination is an fp register, in which case this is
5652 an fselect instruction.
5653 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5654 this is a check load.
5655 In all cases, nothing we do in this function applies. */
5659 if (COMPARISON_P (src)
5660 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5661 /* Set pflags->is_fp to 1 so that we know we're dealing
5662 with a floating point comparison when processing the
5663 destination of the SET. */
5666 /* Discover if this is a parallel comparison. We only handle
5667 and.orcm and or.andcm at present, since we must retain a
5668 strict inverse on the predicate pair. */
5669 else if (GET_CODE (src) == AND)
5671 else if (GET_CODE (src) == IOR)
5678 /* Subroutine of rtx_needs_barrier; this function determines whether the
5679 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5680 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5684 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5686 int need_barrier = 0;
5688 rtx src = SET_SRC (x);
5690 if (GET_CODE (src) == CALL)
5691 /* We don't need to worry about the result registers that
5692 get written by subroutine call. */
5693 return rtx_needs_barrier (src, flags, pred);
5694 else if (SET_DEST (x) == pc_rtx)
5696 /* X is a conditional branch. */
5697 /* ??? This seems redundant, as the caller sets this bit for
5699 if (!ia64_spec_check_src_p (src))
5700 flags.is_branch = 1;
5701 return rtx_needs_barrier (src, flags, pred);
5704 if (ia64_spec_check_src_p (src))
5705 /* Avoid checking one register twice (in condition
5706 and in 'then' section) for ldc pattern. */
5708 gcc_assert (REG_P (XEXP (src, 2)));
5709 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5711 /* We process MEM below. */
5712 src = XEXP (src, 1);
5715 need_barrier |= rtx_needs_barrier (src, flags, pred);
5718 if (GET_CODE (dst) == ZERO_EXTRACT)
5720 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5721 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5723 return need_barrier;
5726 /* Handle an access to rtx X of type FLAGS using predicate register
5727 PRED. Return 1 if this access creates a dependency with an earlier
5728 instruction in the same group. */
5731 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5734 int is_complemented = 0;
5735 int need_barrier = 0;
5736 const char *format_ptr;
5737 struct reg_flags new_flags;
5745 switch (GET_CODE (x))
5748 update_set_flags (x, &new_flags);
5749 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5750 if (GET_CODE (SET_SRC (x)) != CALL)
5752 new_flags.is_write = 1;
5753 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5758 new_flags.is_write = 0;
5759 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5761 /* Avoid multiple register writes, in case this is a pattern with
5762 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5763 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
5765 new_flags.is_write = 1;
5766 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5767 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5768 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5773 /* X is a predicated instruction. */
5775 cond = COND_EXEC_TEST (x);
5777 need_barrier = rtx_needs_barrier (cond, flags, 0);
5779 if (GET_CODE (cond) == EQ)
5780 is_complemented = 1;
5781 cond = XEXP (cond, 0);
5782 gcc_assert (GET_CODE (cond) == REG
5783 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5784 pred = REGNO (cond);
5785 if (is_complemented)
5788 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5789 return need_barrier;
5793 /* Clobber & use are for earlier compiler-phases only. */
5798 /* We always emit stop bits for traditional asms. We emit stop bits
5799 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5800 if (GET_CODE (x) != ASM_OPERANDS
5801 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5803 /* Avoid writing the register multiple times if we have multiple
5804 asm outputs. This avoids a failure in rws_access_reg. */
5805 if (! rws_insn_test (REG_VOLATILE))
5807 new_flags.is_write = 1;
5808 rws_access_regno (REG_VOLATILE, new_flags, pred);
5813 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5814 We cannot just fall through here since then we would be confused
5815 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5816 traditional asms unlike their normal usage. */
5818 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5819 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5824 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5826 rtx pat = XVECEXP (x, 0, i);
5827 switch (GET_CODE (pat))
5830 update_set_flags (pat, &new_flags);
5831 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5837 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5848 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5850 rtx pat = XVECEXP (x, 0, i);
5851 if (GET_CODE (pat) == SET)
5853 if (GET_CODE (SET_SRC (pat)) != CALL)
5855 new_flags.is_write = 1;
5856 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5860 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5861 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5866 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5869 if (REGNO (x) == AR_UNAT_REGNUM)
5871 for (i = 0; i < 64; ++i)
5872 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5875 need_barrier = rws_access_reg (x, flags, pred);
5879 /* Find the regs used in memory address computation. */
5880 new_flags.is_write = 0;
5881 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5884 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
5885 case SYMBOL_REF: case LABEL_REF: case CONST:
5888 /* Operators with side-effects. */
5889 case POST_INC: case POST_DEC:
5890 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5892 new_flags.is_write = 0;
5893 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5894 new_flags.is_write = 1;
5895 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5899 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5901 new_flags.is_write = 0;
5902 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5903 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5904 new_flags.is_write = 1;
5905 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5908 /* Handle common unary and binary ops for efficiency. */
5909 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5910 case MOD: case UDIV: case UMOD: case AND: case IOR:
5911 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5912 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5913 case NE: case EQ: case GE: case GT: case LE:
5914 case LT: case GEU: case GTU: case LEU: case LTU:
5915 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5916 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5919 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5920 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5921 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5922 case SQRT: case FFS: case POPCOUNT:
5923 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5927 /* VEC_SELECT's second argument is a PARALLEL with integers that
5928 describe the elements selected. On ia64, those integers are
5929 always constants. Avoid walking the PARALLEL so that we don't
5930 get confused with "normal" parallels and then die. */
5931 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5935 switch (XINT (x, 1))
5937 case UNSPEC_LTOFF_DTPMOD:
5938 case UNSPEC_LTOFF_DTPREL:
5940 case UNSPEC_LTOFF_TPREL:
5942 case UNSPEC_PRED_REL_MUTEX:
5943 case UNSPEC_PIC_CALL:
5945 case UNSPEC_FETCHADD_ACQ:
5946 case UNSPEC_BSP_VALUE:
5947 case UNSPEC_FLUSHRS:
5948 case UNSPEC_BUNDLE_SELECTOR:
5951 case UNSPEC_GR_SPILL:
5952 case UNSPEC_GR_RESTORE:
5954 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5955 HOST_WIDE_INT bit = (offset >> 3) & 63;
5957 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5958 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5959 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5964 case UNSPEC_FR_SPILL:
5965 case UNSPEC_FR_RESTORE:
5966 case UNSPEC_GETF_EXP:
5967 case UNSPEC_SETF_EXP:
5969 case UNSPEC_FR_SQRT_RECIP_APPROX:
5970 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
5975 case UNSPEC_CHKACLR:
5977 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5980 case UNSPEC_FR_RECIP_APPROX:
5982 case UNSPEC_COPYSIGN:
5983 case UNSPEC_FR_RECIP_APPROX_RES:
5984 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5985 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5988 case UNSPEC_CMPXCHG_ACQ:
5989 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5990 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5998 case UNSPEC_VOLATILE:
5999 switch (XINT (x, 1))
6002 /* Alloc must always be the first instruction of a group.
6003 We force this by always returning true. */
6004 /* ??? We might get better scheduling if we explicitly check for
6005 input/local/output register dependencies, and modify the
6006 scheduler so that alloc is always reordered to the start of
6007 the current group. We could then eliminate all of the
6008 first_instruction code. */
6009 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6011 new_flags.is_write = 1;
6012 rws_access_regno (REG_AR_CFM, new_flags, pred);
6015 case UNSPECV_SET_BSP:
6019 case UNSPECV_BLOCKAGE:
6020 case UNSPECV_INSN_GROUP_BARRIER:
6022 case UNSPECV_PSAC_ALL:
6023 case UNSPECV_PSAC_NORMAL:
6032 new_flags.is_write = 0;
6033 need_barrier = rws_access_regno (REG_RP, flags, pred);
6034 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6036 new_flags.is_write = 1;
6037 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6038 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6042 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6043 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6044 switch (format_ptr[i])
6046 case '0': /* unused field */
6047 case 'i': /* integer */
6048 case 'n': /* note */
6049 case 'w': /* wide integer */
6050 case 's': /* pointer to string */
6051 case 'S': /* optional pointer to string */
6055 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6060 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6061 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6070 return need_barrier;
6073 /* Clear out the state for group_barrier_needed at the start of a
6074 sequence of insns. */
6077 init_insn_group_barriers (void)
6079 memset (rws_sum, 0, sizeof (rws_sum));
6080 first_instruction = 1;
6083 /* Given the current state, determine whether a group barrier (a stop bit) is
6084 necessary before INSN. Return nonzero if so. This modifies the state to
6085 include the effects of INSN as a side-effect. */
6088 group_barrier_needed (rtx insn)
6091 int need_barrier = 0;
6092 struct reg_flags flags;
6094 memset (&flags, 0, sizeof (flags));
6095 switch (GET_CODE (insn))
6101 /* A barrier doesn't imply an instruction group boundary. */
6105 memset (rws_insn, 0, sizeof (rws_insn));
6109 flags.is_branch = 1;
6110 flags.is_sibcall = SIBLING_CALL_P (insn);
6111 memset (rws_insn, 0, sizeof (rws_insn));
6113 /* Don't bundle a call following another call. */
6114 if ((pat = prev_active_insn (insn))
6115 && GET_CODE (pat) == CALL_INSN)
6121 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6125 if (!ia64_spec_check_p (insn))
6126 flags.is_branch = 1;
6128 /* Don't bundle a jump following a call. */
6129 if ((pat = prev_active_insn (insn))
6130 && GET_CODE (pat) == CALL_INSN)
6138 if (GET_CODE (PATTERN (insn)) == USE
6139 || GET_CODE (PATTERN (insn)) == CLOBBER)
6140 /* Don't care about USE and CLOBBER "insns"---those are used to
6141 indicate to the optimizer that it shouldn't get rid of
6142 certain operations. */
6145 pat = PATTERN (insn);
6147 /* Ug. Hack hacks hacked elsewhere. */
6148 switch (recog_memoized (insn))
6150 /* We play dependency tricks with the epilogue in order
6151 to get proper schedules. Undo this for dv analysis. */
6152 case CODE_FOR_epilogue_deallocate_stack:
6153 case CODE_FOR_prologue_allocate_stack:
6154 pat = XVECEXP (pat, 0, 0);
6157 /* The pattern we use for br.cloop confuses the code above.
6158 The second element of the vector is representative. */
6159 case CODE_FOR_doloop_end_internal:
6160 pat = XVECEXP (pat, 0, 1);
6163 /* Doesn't generate code. */
6164 case CODE_FOR_pred_rel_mutex:
6165 case CODE_FOR_prologue_use:
6172 memset (rws_insn, 0, sizeof (rws_insn));
6173 need_barrier = rtx_needs_barrier (pat, flags, 0);
6175 /* Check to see if the previous instruction was a volatile
6178 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6186 if (first_instruction && INSN_P (insn)
6187 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6188 && GET_CODE (PATTERN (insn)) != USE
6189 && GET_CODE (PATTERN (insn)) != CLOBBER)
6192 first_instruction = 0;
6195 return need_barrier;
6198 /* Like group_barrier_needed, but do not clobber the current state. */
6201 safe_group_barrier_needed (rtx insn)
6203 int saved_first_instruction;
6206 saved_first_instruction = first_instruction;
6207 in_safe_group_barrier = 1;
6209 t = group_barrier_needed (insn);
6211 first_instruction = saved_first_instruction;
6212 in_safe_group_barrier = 0;
6217 /* Scan the current function and insert stop bits as necessary to
6218 eliminate dependencies. This function assumes that a final
6219 instruction scheduling pass has been run which has already
6220 inserted most of the necessary stop bits. This function only
6221 inserts new ones at basic block boundaries, since these are
6222 invisible to the scheduler. */
6225 emit_insn_group_barriers (FILE *dump)
6229 int insns_since_last_label = 0;
6231 init_insn_group_barriers ();
6233 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6235 if (GET_CODE (insn) == CODE_LABEL)
6237 if (insns_since_last_label)
6239 insns_since_last_label = 0;
6241 else if (GET_CODE (insn) == NOTE
6242 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6244 if (insns_since_last_label)
6246 insns_since_last_label = 0;
6248 else if (GET_CODE (insn) == INSN
6249 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6250 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6252 init_insn_group_barriers ();
6255 else if (INSN_P (insn))
6257 insns_since_last_label = 1;
6259 if (group_barrier_needed (insn))
6264 fprintf (dump, "Emitting stop before label %d\n",
6265 INSN_UID (last_label));
6266 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6269 init_insn_group_barriers ();
6277 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6278 This function has to emit all necessary group barriers. */
6281 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6285 init_insn_group_barriers ();
6287 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6289 if (GET_CODE (insn) == BARRIER)
6291 rtx last = prev_active_insn (insn);
6295 if (GET_CODE (last) == JUMP_INSN
6296 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6297 last = prev_active_insn (last);
6298 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6299 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6301 init_insn_group_barriers ();
6303 else if (INSN_P (insn))
6305 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6306 init_insn_group_barriers ();
6307 else if (group_barrier_needed (insn))
6309 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6310 init_insn_group_barriers ();
6311 group_barrier_needed (insn);
6319 /* Instruction scheduling support. */
6321 #define NR_BUNDLES 10
6323 /* A list of names of all available bundles. */
6325 static const char *bundle_name [NR_BUNDLES] =
6331 #if NR_BUNDLES == 10
6341 /* Nonzero if we should insert stop bits into the schedule. */
6343 int ia64_final_schedule = 0;
6345 /* Codes of the corresponding queried units: */
6347 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6348 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6350 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6351 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6353 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6355 /* The following variable value is an insn group barrier. */
6357 static rtx dfa_stop_insn;
6359 /* The following variable value is the last issued insn. */
6361 static rtx last_scheduled_insn;
6363 /* The following variable value is pointer to a DFA state used as
6364 temporary variable. */
6366 static state_t temp_dfa_state = NULL;
6368 /* The following variable value is DFA state after issuing the last
6371 static state_t prev_cycle_state = NULL;
6373 /* The following array element values are TRUE if the corresponding
6374 insn requires to add stop bits before it. */
6376 static char *stops_p = NULL;
6378 /* The following variable is used to set up the mentioned above array. */
6380 static int stop_before_p = 0;
6382 /* The following variable value is length of the arrays `clocks' and
6385 static int clocks_length;
6387 /* The following array element values are cycles on which the
6388 corresponding insn will be issued. The array is used only for
6393 /* The following array element values are numbers of cycles should be
6394 added to improve insn scheduling for MM_insns for Itanium1. */
6396 static int *add_cycles;
6398 /* The following variable value is number of data speculations in progress. */
6399 static int pending_data_specs = 0;
6401 /* Number of memory references on current and three future processor cycles. */
6402 static char mem_ops_in_group[4];
6404 /* Number of current processor cycle (from scheduler's point of view). */
6405 static int current_cycle;
6407 static rtx ia64_single_set (rtx);
6408 static void ia64_emit_insn_before (rtx, rtx);
6410 /* Map a bundle number to its pseudo-op. */
6413 get_bundle_name (int b)
6415 return bundle_name[b];
6419 /* Return the maximum number of instructions a cpu can issue. */
6422 ia64_issue_rate (void)
6427 /* Helper function - like single_set, but look inside COND_EXEC. */
6430 ia64_single_set (rtx insn)
6432 rtx x = PATTERN (insn), ret;
6433 if (GET_CODE (x) == COND_EXEC)
6434 x = COND_EXEC_CODE (x);
6435 if (GET_CODE (x) == SET)
6438 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6439 Although they are not classical single set, the second set is there just
6440 to protect it from moving past FP-relative stack accesses. */
6441 switch (recog_memoized (insn))
6443 case CODE_FOR_prologue_allocate_stack:
6444 case CODE_FOR_epilogue_deallocate_stack:
6445 ret = XVECEXP (x, 0, 0);
6449 ret = single_set_2 (insn, x);
6456 /* Adjust the cost of a scheduling dependency.
6457 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6458 COST is the current cost, DW is dependency weakness. */
6460 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6462 enum reg_note dep_type = (enum reg_note) dep_type1;
6463 enum attr_itanium_class dep_class;
6464 enum attr_itanium_class insn_class;
6466 insn_class = ia64_safe_itanium_class (insn);
6467 dep_class = ia64_safe_itanium_class (dep_insn);
6469 /* Treat true memory dependencies separately. Ignore apparent true
6470 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6471 if (dep_type == REG_DEP_TRUE
6472 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6473 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6476 if (dw == MIN_DEP_WEAK)
6477 /* Store and load are likely to alias, use higher cost to avoid stall. */
6478 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6479 else if (dw > MIN_DEP_WEAK)
6481 /* Store and load are less likely to alias. */
6482 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6483 /* Assume there will be no cache conflict for floating-point data.
6484 For integer data, L1 conflict penalty is huge (17 cycles), so we
6485 never assume it will not cause a conflict. */
6491 if (dep_type != REG_DEP_OUTPUT)
6494 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6495 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6501 /* Like emit_insn_before, but skip cycle_display notes.
6502 ??? When cycle display notes are implemented, update this. */
6505 ia64_emit_insn_before (rtx insn, rtx before)
6507 emit_insn_before (insn, before);
6510 /* The following function marks insns who produce addresses for load
6511 and store insns. Such insns will be placed into M slots because it
6512 decrease latency time for Itanium1 (see function
6513 `ia64_produce_address_p' and the DFA descriptions). */
6516 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6518 rtx insn, next, next_tail;
6520 /* Before reload, which_alternative is not set, which means that
6521 ia64_safe_itanium_class will produce wrong results for (at least)
6522 move instructions. */
6523 if (!reload_completed)
6526 next_tail = NEXT_INSN (tail);
6527 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6530 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6532 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6534 sd_iterator_def sd_it;
6536 bool has_mem_op_consumer_p = false;
6538 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6540 enum attr_itanium_class c;
6542 if (DEP_TYPE (dep) != REG_DEP_TRUE)
6545 next = DEP_CON (dep);
6546 c = ia64_safe_itanium_class (next);
6547 if ((c == ITANIUM_CLASS_ST
6548 || c == ITANIUM_CLASS_STF)
6549 && ia64_st_address_bypass_p (insn, next))
6551 has_mem_op_consumer_p = true;
6554 else if ((c == ITANIUM_CLASS_LD
6555 || c == ITANIUM_CLASS_FLD
6556 || c == ITANIUM_CLASS_FLDP)
6557 && ia64_ld_address_bypass_p (insn, next))
6559 has_mem_op_consumer_p = true;
6564 insn->call = has_mem_op_consumer_p;
6568 /* We're beginning a new block. Initialize data structures as necessary. */
6571 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6572 int sched_verbose ATTRIBUTE_UNUSED,
6573 int max_ready ATTRIBUTE_UNUSED)
6575 #ifdef ENABLE_CHECKING
6578 if (!sel_sched_p () && reload_completed)
6579 for (insn = NEXT_INSN (current_sched_info->prev_head);
6580 insn != current_sched_info->next_tail;
6581 insn = NEXT_INSN (insn))
6582 gcc_assert (!SCHED_GROUP_P (insn));
6584 last_scheduled_insn = NULL_RTX;
6585 init_insn_group_barriers ();
6588 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
6591 /* We're beginning a scheduling pass. Check assertion. */
6594 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6595 int sched_verbose ATTRIBUTE_UNUSED,
6596 int max_ready ATTRIBUTE_UNUSED)
6598 gcc_assert (pending_data_specs == 0);
6601 /* Scheduling pass is now finished. Free/reset static variable. */
6603 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6604 int sched_verbose ATTRIBUTE_UNUSED)
6606 gcc_assert (pending_data_specs == 0);
6609 /* Return TRUE if INSN is a load (either normal or speculative, but not a
6610 speculation check), FALSE otherwise. */
6612 is_load_p (rtx insn)
6614 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6617 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6618 && get_attr_check_load (insn) == CHECK_LOAD_NO);
6621 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6622 (taking account for 3-cycle cache reference postponing for stores: Intel
6623 Itanium 2 Reference Manual for Software Development and Optimization,
6626 record_memory_reference (rtx insn)
6628 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6630 switch (insn_class) {
6631 case ITANIUM_CLASS_FLD:
6632 case ITANIUM_CLASS_LD:
6633 mem_ops_in_group[current_cycle % 4]++;
6635 case ITANIUM_CLASS_STF:
6636 case ITANIUM_CLASS_ST:
6637 mem_ops_in_group[(current_cycle + 3) % 4]++;
6643 /* We are about to being issuing insns for this clock cycle.
6644 Override the default sort algorithm to better slot instructions. */
6647 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6648 int *pn_ready, int clock_var,
6652 int n_ready = *pn_ready;
6653 rtx *e_ready = ready + n_ready;
6657 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6659 if (reorder_type == 0)
6661 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6663 for (insnp = ready; insnp < e_ready; insnp++)
6664 if (insnp < e_ready)
6667 enum attr_type t = ia64_safe_type (insn);
6668 if (t == TYPE_UNKNOWN)
6670 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6671 || asm_noperands (PATTERN (insn)) >= 0)
6673 rtx lowest = ready[n_asms];
6674 ready[n_asms] = insn;
6680 rtx highest = ready[n_ready - 1];
6681 ready[n_ready - 1] = insn;
6688 if (n_asms < n_ready)
6690 /* Some normal insns to process. Skip the asms. */
6694 else if (n_ready > 0)
6698 if (ia64_final_schedule)
6701 int nr_need_stop = 0;
6703 for (insnp = ready; insnp < e_ready; insnp++)
6704 if (safe_group_barrier_needed (*insnp))
6707 if (reorder_type == 1 && n_ready == nr_need_stop)
6709 if (reorder_type == 0)
6712 /* Move down everything that needs a stop bit, preserving
6714 while (insnp-- > ready + deleted)
6715 while (insnp >= ready + deleted)
6718 if (! safe_group_barrier_needed (insn))
6720 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6728 current_cycle = clock_var;
6729 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
6734 /* Move down loads/stores, preserving relative order. */
6735 while (insnp-- > ready + moved)
6736 while (insnp >= ready + moved)
6739 if (! is_load_p (insn))
6741 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6752 /* We are about to being issuing insns for this clock cycle. Override
6753 the default sort algorithm to better slot instructions. */
6756 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6759 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6760 pn_ready, clock_var, 0);
6763 /* Like ia64_sched_reorder, but called after issuing each insn.
6764 Override the default sort algorithm to better slot instructions. */
6767 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6768 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6769 int *pn_ready, int clock_var)
6771 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6772 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6773 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6777 /* We are about to issue INSN. Return the number of insns left on the
6778 ready queue that can be issued this cycle. */
6781 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6782 int sched_verbose ATTRIBUTE_UNUSED,
6783 rtx insn ATTRIBUTE_UNUSED,
6784 int can_issue_more ATTRIBUTE_UNUSED)
6786 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
6787 /* Modulo scheduling does not extend h_i_d when emitting
6788 new instructions. Don't use h_i_d, if we don't have to. */
6790 if (DONE_SPEC (insn) & BEGIN_DATA)
6791 pending_data_specs++;
6792 if (CHECK_SPEC (insn) & BEGIN_DATA)
6793 pending_data_specs--;
6796 last_scheduled_insn = insn;
6797 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6798 if (reload_completed)
6800 int needed = group_barrier_needed (insn);
6802 gcc_assert (!needed);
6803 if (GET_CODE (insn) == CALL_INSN)
6804 init_insn_group_barriers ();
6805 stops_p [INSN_UID (insn)] = stop_before_p;
6808 record_memory_reference (insn);
6813 /* We are choosing insn from the ready queue. Return nonzero if INSN
6817 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6819 gcc_assert (insn && INSN_P (insn));
6820 return ((!reload_completed
6821 || !safe_group_barrier_needed (insn))
6822 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
6823 && (!mflag_sched_mem_insns_hard_limit
6824 || !is_load_p (insn)
6825 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
6828 /* We are choosing insn from the ready queue. Return nonzero if INSN
6832 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
6834 gcc_assert (insn && INSN_P (insn));
6835 /* Size of ALAT is 32. As far as we perform conservative data speculation,
6836 we keep ALAT half-empty. */
6837 return (pending_data_specs < 16
6838 || !(TODO_SPEC (insn) & BEGIN_DATA));
6841 /* The following variable value is pseudo-insn used by the DFA insn
6842 scheduler to change the DFA state when the simulated clock is
6845 static rtx dfa_pre_cycle_insn;
6847 /* Returns 1 when a meaningful insn was scheduled between the last group
6848 barrier and LAST. */
6850 scheduled_good_insn (rtx last)
6852 if (last && recog_memoized (last) >= 0)
6856 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
6857 && !stops_p[INSN_UID (last)];
6858 last = PREV_INSN (last))
6859 /* We could hit a NOTE_INSN_DELETED here which is actually outside
6860 the ebb we're scheduling. */
6861 if (INSN_P (last) && recog_memoized (last) >= 0)
6867 /* We are about to being issuing INSN. Return nonzero if we cannot
6868 issue it on given cycle CLOCK and return zero if we should not sort
6869 the ready queue on the next clock start. */
6872 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6873 int clock, int *sort_p)
6875 int setup_clocks_p = FALSE;
6877 gcc_assert (insn && INSN_P (insn));
6878 /* When a group barrier is needed for insn, last_scheduled_insn
6880 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
6881 || last_scheduled_insn);
6883 if ((reload_completed
6884 && (safe_group_barrier_needed (insn)
6885 || (mflag_sched_stop_bits_after_every_cycle
6886 && last_clock != clock
6887 && last_scheduled_insn
6888 && scheduled_good_insn (last_scheduled_insn))))
6889 || (last_scheduled_insn
6890 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6891 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6892 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6894 init_insn_group_barriers ();
6896 if (verbose && dump)
6897 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6898 last_clock == clock ? " + cycle advance" : "");
6901 current_cycle = clock;
6902 mem_ops_in_group[current_cycle % 4] = 0;
6904 if (last_clock == clock)
6906 state_transition (curr_state, dfa_stop_insn);
6907 if (TARGET_EARLY_STOP_BITS)
6908 *sort_p = (last_scheduled_insn == NULL_RTX
6909 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6914 else if (reload_completed)
6915 setup_clocks_p = TRUE;
6917 if (last_scheduled_insn)
6919 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6920 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6921 state_reset (curr_state);
6924 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6925 state_transition (curr_state, dfa_stop_insn);
6926 state_transition (curr_state, dfa_pre_cycle_insn);
6927 state_transition (curr_state, NULL);
6931 else if (reload_completed)
6932 setup_clocks_p = TRUE;
6934 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6935 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6936 && asm_noperands (PATTERN (insn)) < 0)
6938 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6940 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6942 sd_iterator_def sd_it;
6946 FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep)
6947 if (DEP_TYPE (dep) == REG_DEP_TRUE)
6949 enum attr_itanium_class dep_class;
6950 rtx dep_insn = DEP_PRO (dep);
6952 dep_class = ia64_safe_itanium_class (dep_insn);
6953 if ((dep_class == ITANIUM_CLASS_MMMUL
6954 || dep_class == ITANIUM_CLASS_MMSHF)
6955 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6957 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6958 d = last_clock - clocks [INSN_UID (dep_insn)];
6961 add_cycles [INSN_UID (insn)] = 3 - d;
6968 /* Implement targetm.sched.h_i_d_extended hook.
6969 Extend internal data structures. */
6971 ia64_h_i_d_extended (void)
6973 if (stops_p != NULL)
6975 int new_clocks_length = get_max_uid () * 3 / 2;
6977 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
6979 if (ia64_tune == PROCESSOR_ITANIUM)
6981 clocks = (int *) xrecalloc (clocks, new_clocks_length, clocks_length,
6983 add_cycles = (int *) xrecalloc (add_cycles, new_clocks_length,
6984 clocks_length, sizeof (int));
6987 clocks_length = new_clocks_length;
6992 /* This structure describes the data used by the backend to guide scheduling.
6993 When the current scheduling point is switched, this data should be saved
6994 and restored later, if the scheduler returns to this point. */
6995 struct _ia64_sched_context
6997 state_t prev_cycle_state;
6998 rtx last_scheduled_insn;
6999 struct reg_write_state rws_sum[NUM_REGS];
7000 struct reg_write_state rws_insn[NUM_REGS];
7001 int first_instruction;
7002 int pending_data_specs;
7004 char mem_ops_in_group[4];
7006 typedef struct _ia64_sched_context *ia64_sched_context_t;
7008 /* Allocates a scheduling context. */
7010 ia64_alloc_sched_context (void)
7012 return xmalloc (sizeof (struct _ia64_sched_context));
7015 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7016 the global context otherwise. */
7018 ia64_init_sched_context (void *_sc, bool clean_p)
7020 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7022 sc->prev_cycle_state = xmalloc (dfa_state_size);
7025 state_reset (sc->prev_cycle_state);
7026 sc->last_scheduled_insn = NULL_RTX;
7027 memset (sc->rws_sum, 0, sizeof (rws_sum));
7028 memset (sc->rws_insn, 0, sizeof (rws_insn));
7029 sc->first_instruction = 1;
7030 sc->pending_data_specs = 0;
7031 sc->current_cycle = 0;
7032 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7036 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7037 sc->last_scheduled_insn = last_scheduled_insn;
7038 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7039 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7040 sc->first_instruction = first_instruction;
7041 sc->pending_data_specs = pending_data_specs;
7042 sc->current_cycle = current_cycle;
7043 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7047 /* Sets the global scheduling context to the one pointed to by _SC. */
7049 ia64_set_sched_context (void *_sc)
7051 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7053 gcc_assert (sc != NULL);
7055 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7056 last_scheduled_insn = sc->last_scheduled_insn;
7057 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7058 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7059 first_instruction = sc->first_instruction;
7060 pending_data_specs = sc->pending_data_specs;
7061 current_cycle = sc->current_cycle;
7062 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7065 /* Clears the data in the _SC scheduling context. */
7067 ia64_clear_sched_context (void *_sc)
7069 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7071 free (sc->prev_cycle_state);
7072 sc->prev_cycle_state = NULL;
7075 /* Frees the _SC scheduling context. */
7077 ia64_free_sched_context (void *_sc)
7079 gcc_assert (_sc != NULL);
7084 typedef rtx (* gen_func_t) (rtx, rtx);
7086 /* Return a function that will generate a load of mode MODE_NO
7087 with speculation types TS. */
7089 get_spec_load_gen_function (ds_t ts, int mode_no)
7091 static gen_func_t gen_ld_[] = {
7101 gen_zero_extendqidi2,
7102 gen_zero_extendhidi2,
7103 gen_zero_extendsidi2,
7106 static gen_func_t gen_ld_a[] = {
7116 gen_zero_extendqidi2_advanced,
7117 gen_zero_extendhidi2_advanced,
7118 gen_zero_extendsidi2_advanced,
7120 static gen_func_t gen_ld_s[] = {
7121 gen_movbi_speculative,
7122 gen_movqi_speculative,
7123 gen_movhi_speculative,
7124 gen_movsi_speculative,
7125 gen_movdi_speculative,
7126 gen_movsf_speculative,
7127 gen_movdf_speculative,
7128 gen_movxf_speculative,
7129 gen_movti_speculative,
7130 gen_zero_extendqidi2_speculative,
7131 gen_zero_extendhidi2_speculative,
7132 gen_zero_extendsidi2_speculative,
7134 static gen_func_t gen_ld_sa[] = {
7135 gen_movbi_speculative_advanced,
7136 gen_movqi_speculative_advanced,
7137 gen_movhi_speculative_advanced,
7138 gen_movsi_speculative_advanced,
7139 gen_movdi_speculative_advanced,
7140 gen_movsf_speculative_advanced,
7141 gen_movdf_speculative_advanced,
7142 gen_movxf_speculative_advanced,
7143 gen_movti_speculative_advanced,
7144 gen_zero_extendqidi2_speculative_advanced,
7145 gen_zero_extendhidi2_speculative_advanced,
7146 gen_zero_extendsidi2_speculative_advanced,
7148 static gen_func_t gen_ld_s_a[] = {
7149 gen_movbi_speculative_a,
7150 gen_movqi_speculative_a,
7151 gen_movhi_speculative_a,
7152 gen_movsi_speculative_a,
7153 gen_movdi_speculative_a,
7154 gen_movsf_speculative_a,
7155 gen_movdf_speculative_a,
7156 gen_movxf_speculative_a,
7157 gen_movti_speculative_a,
7158 gen_zero_extendqidi2_speculative_a,
7159 gen_zero_extendhidi2_speculative_a,
7160 gen_zero_extendsidi2_speculative_a,
7165 if (ts & BEGIN_DATA)
7167 if (ts & BEGIN_CONTROL)
7172 else if (ts & BEGIN_CONTROL)
7174 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7175 || ia64_needs_block_p (ts))
7178 gen_ld = gen_ld_s_a;
7185 return gen_ld[mode_no];
7188 /* Constants that help mapping 'enum machine_mode' to int. */
7191 SPEC_MODE_INVALID = -1,
7192 SPEC_MODE_FIRST = 0,
7193 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7194 SPEC_MODE_FOR_EXTEND_LAST = 3,
7200 /* Offset to reach ZERO_EXTEND patterns. */
7201 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7204 /* Return index of the MODE. */
7206 ia64_mode_to_int (enum machine_mode mode)
7210 case BImode: return 0; /* SPEC_MODE_FIRST */
7211 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7212 case HImode: return 2;
7213 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7214 case DImode: return 4;
7215 case SFmode: return 5;
7216 case DFmode: return 6;
7217 case XFmode: return 7;
7219 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7220 mentioned in itanium[12].md. Predicate fp_register_operand also
7221 needs to be defined. Bottom line: better disable for now. */
7222 return SPEC_MODE_INVALID;
7223 default: return SPEC_MODE_INVALID;
7227 /* Provide information about speculation capabilities. */
7229 ia64_set_sched_flags (spec_info_t spec_info)
7231 unsigned int *flags = &(current_sched_info->flags);
7233 if (*flags & SCHED_RGN
7234 || *flags & SCHED_EBB
7235 || *flags & SEL_SCHED)
7239 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7240 || (mflag_sched_ar_data_spec && reload_completed))
7245 && ((mflag_sched_br_in_data_spec && !reload_completed)
7246 || (mflag_sched_ar_in_data_spec && reload_completed)))
7250 if (mflag_sched_control_spec
7252 || reload_completed))
7254 mask |= BEGIN_CONTROL;
7256 if (!sel_sched_p () && mflag_sched_in_control_spec)
7257 mask |= BE_IN_CONTROL;
7260 spec_info->mask = mask;
7264 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7266 if (mask & BE_IN_SPEC)
7269 spec_info->flags = 0;
7271 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7272 spec_info->flags |= PREFER_NON_DATA_SPEC;
7274 if (mask & CONTROL_SPEC)
7276 if (mflag_sched_prefer_non_control_spec_insns)
7277 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7279 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7280 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7283 if (sched_verbose >= 1)
7284 spec_info->dump = sched_dump;
7286 spec_info->dump = 0;
7288 if (mflag_sched_count_spec_in_critical_path)
7289 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7293 spec_info->mask = 0;
7296 /* If INSN is an appropriate load return its mode.
7297 Return -1 otherwise. */
7299 get_mode_no_for_insn (rtx insn)
7301 rtx reg, mem, mode_rtx;
7305 extract_insn_cached (insn);
7307 /* We use WHICH_ALTERNATIVE only after reload. This will
7308 guarantee that reload won't touch a speculative insn. */
7310 if (recog_data.n_operands != 2)
7313 reg = recog_data.operand[0];
7314 mem = recog_data.operand[1];
7316 /* We should use MEM's mode since REG's mode in presence of
7317 ZERO_EXTEND will always be DImode. */
7318 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7319 /* Process non-speculative ld. */
7321 if (!reload_completed)
7323 /* Do not speculate into regs like ar.lc. */
7324 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7331 rtx mem_reg = XEXP (mem, 0);
7333 if (!REG_P (mem_reg))
7339 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7341 gcc_assert (REG_P (reg) && MEM_P (mem));
7347 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7348 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7349 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7350 /* Process speculative ld or ld.c. */
7352 gcc_assert (REG_P (reg) && MEM_P (mem));
7357 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7359 if (attr_class == ITANIUM_CLASS_CHK_A
7360 || attr_class == ITANIUM_CLASS_CHK_S_I
7361 || attr_class == ITANIUM_CLASS_CHK_S_F)
7368 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7370 if (mode_no == SPEC_MODE_INVALID)
7373 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7377 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7378 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7381 mode_no += SPEC_GEN_EXTEND_OFFSET;
7387 /* If X is an unspec part of a speculative load, return its code.
7388 Return -1 otherwise. */
7390 get_spec_unspec_code (const_rtx x)
7392 if (GET_CODE (x) != UNSPEC)
7414 /* Implement skip_rtx_p hook. */
7416 ia64_skip_rtx_p (const_rtx x)
7418 return get_spec_unspec_code (x) != -1;
7421 /* If INSN is a speculative load, return its UNSPEC code.
7422 Return -1 otherwise. */
7424 get_insn_spec_code (const_rtx insn)
7428 pat = PATTERN (insn);
7430 if (GET_CODE (pat) == COND_EXEC)
7431 pat = COND_EXEC_CODE (pat);
7433 if (GET_CODE (pat) != SET)
7436 reg = SET_DEST (pat);
7440 mem = SET_SRC (pat);
7441 if (GET_CODE (mem) == ZERO_EXTEND)
7442 mem = XEXP (mem, 0);
7444 return get_spec_unspec_code (mem);
7447 /* If INSN is a speculative load, return a ds with the speculation types.
7448 Otherwise [if INSN is a normal instruction] return 0. */
7450 ia64_get_insn_spec_ds (rtx insn)
7452 int code = get_insn_spec_code (insn);
7461 return BEGIN_CONTROL;
7464 return BEGIN_DATA | BEGIN_CONTROL;
7471 /* If INSN is a speculative load return a ds with the speculation types that
7473 Otherwise [if INSN is a normal instruction] return 0. */
7475 ia64_get_insn_checked_ds (rtx insn)
7477 int code = get_insn_spec_code (insn);
7482 return BEGIN_DATA | BEGIN_CONTROL;
7485 return BEGIN_CONTROL;
7489 return BEGIN_DATA | BEGIN_CONTROL;
7496 /* If GEN_P is true, calculate the index of needed speculation check and return
7497 speculative pattern for INSN with speculative mode TS, machine mode
7498 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7499 If GEN_P is false, just calculate the index of needed speculation check. */
7501 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7504 gen_func_t gen_load;
7506 gen_load = get_spec_load_gen_function (ts, mode_no);
7508 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7509 copy_rtx (recog_data.operand[1]));
7511 pat = PATTERN (insn);
7512 if (GET_CODE (pat) == COND_EXEC)
7513 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7520 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7521 ds_t ds ATTRIBUTE_UNUSED)
7526 /* Implement targetm.sched.speculate_insn hook.
7527 Check if the INSN can be TS speculative.
7528 If 'no' - return -1.
7529 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7530 If current pattern of the INSN already provides TS speculation,
7533 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7538 gcc_assert (!(ts & ~SPECULATIVE));
7540 if (ia64_spec_check_p (insn))
7543 if ((ts & BE_IN_SPEC)
7544 && !insn_can_be_in_speculative_p (insn, ts))
7547 mode_no = get_mode_no_for_insn (insn);
7549 if (mode_no != SPEC_MODE_INVALID)
7551 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7556 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7565 /* Return a function that will generate a check for speculation TS with mode
7567 If simple check is needed, pass true for SIMPLE_CHECK_P.
7568 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7570 get_spec_check_gen_function (ds_t ts, int mode_no,
7571 bool simple_check_p, bool clearing_check_p)
7573 static gen_func_t gen_ld_c_clr[] = {
7583 gen_zero_extendqidi2_clr,
7584 gen_zero_extendhidi2_clr,
7585 gen_zero_extendsidi2_clr,
7587 static gen_func_t gen_ld_c_nc[] = {
7597 gen_zero_extendqidi2_nc,
7598 gen_zero_extendhidi2_nc,
7599 gen_zero_extendsidi2_nc,
7601 static gen_func_t gen_chk_a_clr[] = {
7602 gen_advanced_load_check_clr_bi,
7603 gen_advanced_load_check_clr_qi,
7604 gen_advanced_load_check_clr_hi,
7605 gen_advanced_load_check_clr_si,
7606 gen_advanced_load_check_clr_di,
7607 gen_advanced_load_check_clr_sf,
7608 gen_advanced_load_check_clr_df,
7609 gen_advanced_load_check_clr_xf,
7610 gen_advanced_load_check_clr_ti,
7611 gen_advanced_load_check_clr_di,
7612 gen_advanced_load_check_clr_di,
7613 gen_advanced_load_check_clr_di,
7615 static gen_func_t gen_chk_a_nc[] = {
7616 gen_advanced_load_check_nc_bi,
7617 gen_advanced_load_check_nc_qi,
7618 gen_advanced_load_check_nc_hi,
7619 gen_advanced_load_check_nc_si,
7620 gen_advanced_load_check_nc_di,
7621 gen_advanced_load_check_nc_sf,
7622 gen_advanced_load_check_nc_df,
7623 gen_advanced_load_check_nc_xf,
7624 gen_advanced_load_check_nc_ti,
7625 gen_advanced_load_check_nc_di,
7626 gen_advanced_load_check_nc_di,
7627 gen_advanced_load_check_nc_di,
7629 static gen_func_t gen_chk_s[] = {
7630 gen_speculation_check_bi,
7631 gen_speculation_check_qi,
7632 gen_speculation_check_hi,
7633 gen_speculation_check_si,
7634 gen_speculation_check_di,
7635 gen_speculation_check_sf,
7636 gen_speculation_check_df,
7637 gen_speculation_check_xf,
7638 gen_speculation_check_ti,
7639 gen_speculation_check_di,
7640 gen_speculation_check_di,
7641 gen_speculation_check_di,
7644 gen_func_t *gen_check;
7646 if (ts & BEGIN_DATA)
7648 /* We don't need recovery because even if this is ld.sa
7649 ALAT entry will be allocated only if NAT bit is set to zero.
7650 So it is enough to use ld.c here. */
7654 gcc_assert (mflag_sched_spec_ldc);
7656 if (clearing_check_p)
7657 gen_check = gen_ld_c_clr;
7659 gen_check = gen_ld_c_nc;
7663 if (clearing_check_p)
7664 gen_check = gen_chk_a_clr;
7666 gen_check = gen_chk_a_nc;
7669 else if (ts & BEGIN_CONTROL)
7672 /* We might want to use ld.sa -> ld.c instead of
7675 gcc_assert (!ia64_needs_block_p (ts));
7677 if (clearing_check_p)
7678 gen_check = gen_ld_c_clr;
7680 gen_check = gen_ld_c_nc;
7684 gen_check = gen_chk_s;
7690 gcc_assert (mode_no >= 0);
7691 return gen_check[mode_no];
7694 /* Return nonzero, if INSN needs branchy recovery check. */
7696 ia64_needs_block_p (ds_t ts)
7698 if (ts & BEGIN_DATA)
7699 return !mflag_sched_spec_ldc;
7701 gcc_assert ((ts & BEGIN_CONTROL) != 0);
7703 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
7706 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7707 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7708 Otherwise, generate a simple check. */
7710 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
7712 rtx op1, pat, check_pat;
7713 gen_func_t gen_check;
7716 mode_no = get_mode_no_for_insn (insn);
7717 gcc_assert (mode_no >= 0);
7723 gcc_assert (!ia64_needs_block_p (ds));
7724 op1 = copy_rtx (recog_data.operand[1]);
7727 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
7730 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
7732 pat = PATTERN (insn);
7733 if (GET_CODE (pat) == COND_EXEC)
7734 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7740 /* Return nonzero, if X is branchy recovery check. */
7742 ia64_spec_check_p (rtx x)
7745 if (GET_CODE (x) == COND_EXEC)
7746 x = COND_EXEC_CODE (x);
7747 if (GET_CODE (x) == SET)
7748 return ia64_spec_check_src_p (SET_SRC (x));
7752 /* Return nonzero, if SRC belongs to recovery check. */
7754 ia64_spec_check_src_p (rtx src)
7756 if (GET_CODE (src) == IF_THEN_ELSE)
7761 if (GET_CODE (t) == NE)
7765 if (GET_CODE (t) == UNSPEC)
7771 if (code == UNSPEC_LDCCLR
7772 || code == UNSPEC_LDCNC
7773 || code == UNSPEC_CHKACLR
7774 || code == UNSPEC_CHKANC
7775 || code == UNSPEC_CHKS)
7777 gcc_assert (code != 0);
7787 /* The following page contains abstract data `bundle states' which are
7788 used for bundling insns (inserting nops and template generation). */
7790 /* The following describes state of insn bundling. */
7794 /* Unique bundle state number to identify them in the debugging
7797 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
7798 /* number nops before and after the insn */
7799 short before_nops_num, after_nops_num;
7800 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7802 int cost; /* cost of the state in cycles */
7803 int accumulated_insns_num; /* number of all previous insns including
7804 nops. L is considered as 2 insns */
7805 int branch_deviation; /* deviation of previous branches from 3rd slots */
7806 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
7807 struct bundle_state *next; /* next state with the same insn_num */
7808 struct bundle_state *originator; /* originator (previous insn state) */
7809 /* All bundle states are in the following chain. */
7810 struct bundle_state *allocated_states_chain;
7811 /* The DFA State after issuing the insn and the nops. */
7815 /* The following is map insn number to the corresponding bundle state. */
7817 static struct bundle_state **index_to_bundle_states;
7819 /* The unique number of next bundle state. */
7821 static int bundle_states_num;
7823 /* All allocated bundle states are in the following chain. */
7825 static struct bundle_state *allocated_bundle_states_chain;
7827 /* All allocated but not used bundle states are in the following
7830 static struct bundle_state *free_bundle_state_chain;
7833 /* The following function returns a free bundle state. */
7835 static struct bundle_state *
7836 get_free_bundle_state (void)
7838 struct bundle_state *result;
7840 if (free_bundle_state_chain != NULL)
7842 result = free_bundle_state_chain;
7843 free_bundle_state_chain = result->next;
7847 result = XNEW (struct bundle_state);
7848 result->dfa_state = xmalloc (dfa_state_size);
7849 result->allocated_states_chain = allocated_bundle_states_chain;
7850 allocated_bundle_states_chain = result;
7852 result->unique_num = bundle_states_num++;
7857 /* The following function frees given bundle state. */
7860 free_bundle_state (struct bundle_state *state)
7862 state->next = free_bundle_state_chain;
7863 free_bundle_state_chain = state;
7866 /* Start work with abstract data `bundle states'. */
7869 initiate_bundle_states (void)
7871 bundle_states_num = 0;
7872 free_bundle_state_chain = NULL;
7873 allocated_bundle_states_chain = NULL;
7876 /* Finish work with abstract data `bundle states'. */
7879 finish_bundle_states (void)
7881 struct bundle_state *curr_state, *next_state;
7883 for (curr_state = allocated_bundle_states_chain;
7885 curr_state = next_state)
7887 next_state = curr_state->allocated_states_chain;
7888 free (curr_state->dfa_state);
7893 /* Hash table of the bundle states. The key is dfa_state and insn_num
7894 of the bundle states. */
7896 static htab_t bundle_state_table;
7898 /* The function returns hash of BUNDLE_STATE. */
7901 bundle_state_hash (const void *bundle_state)
7903 const struct bundle_state *const state
7904 = (const struct bundle_state *) bundle_state;
7907 for (result = i = 0; i < dfa_state_size; i++)
7908 result += (((unsigned char *) state->dfa_state) [i]
7909 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
7910 return result + state->insn_num;
7913 /* The function returns nonzero if the bundle state keys are equal. */
7916 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
7918 const struct bundle_state *const state1
7919 = (const struct bundle_state *) bundle_state_1;
7920 const struct bundle_state *const state2
7921 = (const struct bundle_state *) bundle_state_2;
7923 return (state1->insn_num == state2->insn_num
7924 && memcmp (state1->dfa_state, state2->dfa_state,
7925 dfa_state_size) == 0);
7928 /* The function inserts the BUNDLE_STATE into the hash table. The
7929 function returns nonzero if the bundle has been inserted into the
7930 table. The table contains the best bundle state with given key. */
7933 insert_bundle_state (struct bundle_state *bundle_state)
7937 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
7938 if (*entry_ptr == NULL)
7940 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
7941 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
7942 *entry_ptr = (void *) bundle_state;
7945 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
7946 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
7947 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
7948 > bundle_state->accumulated_insns_num
7949 || (((struct bundle_state *)
7950 *entry_ptr)->accumulated_insns_num
7951 == bundle_state->accumulated_insns_num
7952 && (((struct bundle_state *)
7953 *entry_ptr)->branch_deviation
7954 > bundle_state->branch_deviation
7955 || (((struct bundle_state *)
7956 *entry_ptr)->branch_deviation
7957 == bundle_state->branch_deviation
7958 && ((struct bundle_state *)
7959 *entry_ptr)->middle_bundle_stops
7960 > bundle_state->middle_bundle_stops))))))
7963 struct bundle_state temp;
7965 temp = *(struct bundle_state *) *entry_ptr;
7966 *(struct bundle_state *) *entry_ptr = *bundle_state;
7967 ((struct bundle_state *) *entry_ptr)->next = temp.next;
7968 *bundle_state = temp;
7973 /* Start work with the hash table. */
7976 initiate_bundle_state_table (void)
7978 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
7982 /* Finish work with the hash table. */
7985 finish_bundle_state_table (void)
7987 htab_delete (bundle_state_table);
7992 /* The following variable is a insn `nop' used to check bundle states
7993 with different number of inserted nops. */
7995 static rtx ia64_nop;
7997 /* The following function tries to issue NOPS_NUM nops for the current
7998 state without advancing processor cycle. If it failed, the
7999 function returns FALSE and frees the current state. */
8002 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8006 for (i = 0; i < nops_num; i++)
8007 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8009 free_bundle_state (curr_state);
8015 /* The following function tries to issue INSN for the current
8016 state without advancing processor cycle. If it failed, the
8017 function returns FALSE and frees the current state. */
8020 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8022 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8024 free_bundle_state (curr_state);
8030 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8031 starting with ORIGINATOR without advancing processor cycle. If
8032 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8033 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8034 If it was successful, the function creates new bundle state and
8035 insert into the hash table and into `index_to_bundle_states'. */
8038 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8039 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8041 struct bundle_state *curr_state;
8043 curr_state = get_free_bundle_state ();
8044 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8045 curr_state->insn = insn;
8046 curr_state->insn_num = originator->insn_num + 1;
8047 curr_state->cost = originator->cost;
8048 curr_state->originator = originator;
8049 curr_state->before_nops_num = before_nops_num;
8050 curr_state->after_nops_num = 0;
8051 curr_state->accumulated_insns_num
8052 = originator->accumulated_insns_num + before_nops_num;
8053 curr_state->branch_deviation = originator->branch_deviation;
8054 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8056 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8058 gcc_assert (GET_MODE (insn) != TImode);
8059 if (!try_issue_nops (curr_state, before_nops_num))
8061 if (!try_issue_insn (curr_state, insn))
8063 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8064 if (curr_state->accumulated_insns_num % 3 != 0)
8065 curr_state->middle_bundle_stops++;
8066 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8067 && curr_state->accumulated_insns_num % 3 != 0)
8069 free_bundle_state (curr_state);
8073 else if (GET_MODE (insn) != TImode)
8075 if (!try_issue_nops (curr_state, before_nops_num))
8077 if (!try_issue_insn (curr_state, insn))
8079 curr_state->accumulated_insns_num++;
8080 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8081 && asm_noperands (PATTERN (insn)) < 0);
8083 if (ia64_safe_type (insn) == TYPE_L)
8084 curr_state->accumulated_insns_num++;
8088 /* If this is an insn that must be first in a group, then don't allow
8089 nops to be emitted before it. Currently, alloc is the only such
8090 supported instruction. */
8091 /* ??? The bundling automatons should handle this for us, but they do
8092 not yet have support for the first_insn attribute. */
8093 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8095 free_bundle_state (curr_state);
8099 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8100 state_transition (curr_state->dfa_state, NULL);
8102 if (!try_issue_nops (curr_state, before_nops_num))
8104 if (!try_issue_insn (curr_state, insn))
8106 curr_state->accumulated_insns_num++;
8107 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8108 || asm_noperands (PATTERN (insn)) >= 0)
8110 /* Finish bundle containing asm insn. */
8111 curr_state->after_nops_num
8112 = 3 - curr_state->accumulated_insns_num % 3;
8113 curr_state->accumulated_insns_num
8114 += 3 - curr_state->accumulated_insns_num % 3;
8116 else if (ia64_safe_type (insn) == TYPE_L)
8117 curr_state->accumulated_insns_num++;
8119 if (ia64_safe_type (insn) == TYPE_B)
8120 curr_state->branch_deviation
8121 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8122 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8124 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8127 struct bundle_state *curr_state1;
8128 struct bundle_state *allocated_states_chain;
8130 curr_state1 = get_free_bundle_state ();
8131 dfa_state = curr_state1->dfa_state;
8132 allocated_states_chain = curr_state1->allocated_states_chain;
8133 *curr_state1 = *curr_state;
8134 curr_state1->dfa_state = dfa_state;
8135 curr_state1->allocated_states_chain = allocated_states_chain;
8136 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8138 curr_state = curr_state1;
8140 if (!try_issue_nops (curr_state,
8141 3 - curr_state->accumulated_insns_num % 3))
8143 curr_state->after_nops_num
8144 = 3 - curr_state->accumulated_insns_num % 3;
8145 curr_state->accumulated_insns_num
8146 += 3 - curr_state->accumulated_insns_num % 3;
8148 if (!insert_bundle_state (curr_state))
8149 free_bundle_state (curr_state);
8153 /* The following function returns position in the two window bundle
8157 get_max_pos (state_t state)
8159 if (cpu_unit_reservation_p (state, pos_6))
8161 else if (cpu_unit_reservation_p (state, pos_5))
8163 else if (cpu_unit_reservation_p (state, pos_4))
8165 else if (cpu_unit_reservation_p (state, pos_3))
8167 else if (cpu_unit_reservation_p (state, pos_2))
8169 else if (cpu_unit_reservation_p (state, pos_1))
8175 /* The function returns code of a possible template for given position
8176 and state. The function should be called only with 2 values of
8177 position equal to 3 or 6. We avoid generating F NOPs by putting
8178 templates containing F insns at the end of the template search
8179 because undocumented anomaly in McKinley derived cores which can
8180 cause stalls if an F-unit insn (including a NOP) is issued within a
8181 six-cycle window after reading certain application registers (such
8182 as ar.bsp). Furthermore, power-considerations also argue against
8183 the use of F-unit instructions unless they're really needed. */
8186 get_template (state_t state, int pos)
8191 if (cpu_unit_reservation_p (state, _0mmi_))
8193 else if (cpu_unit_reservation_p (state, _0mii_))
8195 else if (cpu_unit_reservation_p (state, _0mmb_))
8197 else if (cpu_unit_reservation_p (state, _0mib_))
8199 else if (cpu_unit_reservation_p (state, _0mbb_))
8201 else if (cpu_unit_reservation_p (state, _0bbb_))
8203 else if (cpu_unit_reservation_p (state, _0mmf_))
8205 else if (cpu_unit_reservation_p (state, _0mfi_))
8207 else if (cpu_unit_reservation_p (state, _0mfb_))
8209 else if (cpu_unit_reservation_p (state, _0mlx_))
8214 if (cpu_unit_reservation_p (state, _1mmi_))
8216 else if (cpu_unit_reservation_p (state, _1mii_))
8218 else if (cpu_unit_reservation_p (state, _1mmb_))
8220 else if (cpu_unit_reservation_p (state, _1mib_))
8222 else if (cpu_unit_reservation_p (state, _1mbb_))
8224 else if (cpu_unit_reservation_p (state, _1bbb_))
8226 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8228 else if (cpu_unit_reservation_p (state, _1mfi_))
8230 else if (cpu_unit_reservation_p (state, _1mfb_))
8232 else if (cpu_unit_reservation_p (state, _1mlx_))
8241 /* True when INSN is important for bundling. */
8243 important_for_bundling_p (rtx insn)
8245 return (INSN_P (insn)
8246 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8247 && GET_CODE (PATTERN (insn)) != USE
8248 && GET_CODE (PATTERN (insn)) != CLOBBER);
8251 /* The following function returns an insn important for insn bundling
8252 followed by INSN and before TAIL. */
8255 get_next_important_insn (rtx insn, rtx tail)
8257 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8258 if (important_for_bundling_p (insn))
8263 /* Add a bundle selector TEMPLATE0 before INSN. */
8266 ia64_add_bundle_selector_before (int template0, rtx insn)
8268 rtx b = gen_bundle_selector (GEN_INT (template0));
8270 ia64_emit_insn_before (b, insn);
8271 #if NR_BUNDLES == 10
8272 if ((template0 == 4 || template0 == 5)
8273 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
8276 rtx note = NULL_RTX;
8278 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8279 first or second slot. If it is and has REG_EH_NOTE set, copy it
8280 to following nops, as br.call sets rp to the address of following
8281 bundle and therefore an EH region end must be on a bundle
8283 insn = PREV_INSN (insn);
8284 for (i = 0; i < 3; i++)
8287 insn = next_active_insn (insn);
8288 while (GET_CODE (insn) == INSN
8289 && get_attr_empty (insn) == EMPTY_YES);
8290 if (GET_CODE (insn) == CALL_INSN)
8291 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8296 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8297 || code == CODE_FOR_nop_b);
8298 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8301 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8308 /* The following function does insn bundling. Bundling means
8309 inserting templates and nop insns to fit insn groups into permitted
8310 templates. Instruction scheduling uses NDFA (non-deterministic
8311 finite automata) encoding informations about the templates and the
8312 inserted nops. Nondeterminism of the automata permits follows
8313 all possible insn sequences very fast.
8315 Unfortunately it is not possible to get information about inserting
8316 nop insns and used templates from the automata states. The
8317 automata only says that we can issue an insn possibly inserting
8318 some nops before it and using some template. Therefore insn
8319 bundling in this function is implemented by using DFA
8320 (deterministic finite automata). We follow all possible insn
8321 sequences by inserting 0-2 nops (that is what the NDFA describe for
8322 insn scheduling) before/after each insn being bundled. We know the
8323 start of simulated processor cycle from insn scheduling (insn
8324 starting a new cycle has TImode).
8326 Simple implementation of insn bundling would create enormous
8327 number of possible insn sequences satisfying information about new
8328 cycle ticks taken from the insn scheduling. To make the algorithm
8329 practical we use dynamic programming. Each decision (about
8330 inserting nops and implicitly about previous decisions) is described
8331 by structure bundle_state (see above). If we generate the same
8332 bundle state (key is automaton state after issuing the insns and
8333 nops for it), we reuse already generated one. As consequence we
8334 reject some decisions which cannot improve the solution and
8335 reduce memory for the algorithm.
8337 When we reach the end of EBB (extended basic block), we choose the
8338 best sequence and then, moving back in EBB, insert templates for
8339 the best alternative. The templates are taken from querying
8340 automaton state for each insn in chosen bundle states.
8342 So the algorithm makes two (forward and backward) passes through
8343 EBB. There is an additional forward pass through EBB for Itanium1
8344 processor. This pass inserts more nops to make dependency between
8345 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
8348 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8350 struct bundle_state *curr_state, *next_state, *best_state;
8351 rtx insn, next_insn;
8353 int i, bundle_end_p, only_bundle_end_p, asm_p;
8354 int pos = 0, max_pos, template0, template1;
8357 enum attr_type type;
8360 /* Count insns in the EBB. */
8361 for (insn = NEXT_INSN (prev_head_insn);
8362 insn && insn != tail;
8363 insn = NEXT_INSN (insn))
8369 dfa_clean_insn_cache ();
8370 initiate_bundle_state_table ();
8371 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8372 /* First (forward) pass -- generation of bundle states. */
8373 curr_state = get_free_bundle_state ();
8374 curr_state->insn = NULL;
8375 curr_state->before_nops_num = 0;
8376 curr_state->after_nops_num = 0;
8377 curr_state->insn_num = 0;
8378 curr_state->cost = 0;
8379 curr_state->accumulated_insns_num = 0;
8380 curr_state->branch_deviation = 0;
8381 curr_state->middle_bundle_stops = 0;
8382 curr_state->next = NULL;
8383 curr_state->originator = NULL;
8384 state_reset (curr_state->dfa_state);
8385 index_to_bundle_states [0] = curr_state;
8387 /* Shift cycle mark if it is put on insn which could be ignored. */
8388 for (insn = NEXT_INSN (prev_head_insn);
8390 insn = NEXT_INSN (insn))
8392 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8393 || GET_CODE (PATTERN (insn)) == USE
8394 || GET_CODE (PATTERN (insn)) == CLOBBER)
8395 && GET_MODE (insn) == TImode)
8397 PUT_MODE (insn, VOIDmode);
8398 for (next_insn = NEXT_INSN (insn);
8400 next_insn = NEXT_INSN (next_insn))
8401 if (INSN_P (next_insn)
8402 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8403 && GET_CODE (PATTERN (next_insn)) != USE
8404 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8405 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8407 PUT_MODE (next_insn, TImode);
8411 /* Forward pass: generation of bundle states. */
8412 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8416 gcc_assert (INSN_P (insn)
8417 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8418 && GET_CODE (PATTERN (insn)) != USE
8419 && GET_CODE (PATTERN (insn)) != CLOBBER);
8420 type = ia64_safe_type (insn);
8421 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8423 index_to_bundle_states [insn_num] = NULL;
8424 for (curr_state = index_to_bundle_states [insn_num - 1];
8426 curr_state = next_state)
8428 pos = curr_state->accumulated_insns_num % 3;
8429 next_state = curr_state->next;
8430 /* We must fill up the current bundle in order to start a
8431 subsequent asm insn in a new bundle. Asm insn is always
8432 placed in a separate bundle. */
8434 = (next_insn != NULL_RTX
8435 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8436 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8437 /* We may fill up the current bundle if it is the cycle end
8438 without a group barrier. */
8440 = (only_bundle_end_p || next_insn == NULL_RTX
8441 || (GET_MODE (next_insn) == TImode
8442 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8443 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8445 /* We need to insert 2 nops for cases like M_MII. To
8446 guarantee issuing all insns on the same cycle for
8447 Itanium 1, we need to issue 2 nops after the first M
8448 insn (MnnMII where n is a nop insn). */
8449 || ((type == TYPE_M || type == TYPE_A)
8450 && ia64_tune == PROCESSOR_ITANIUM
8451 && !bundle_end_p && pos == 1))
8452 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8454 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8456 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8459 gcc_assert (index_to_bundle_states [insn_num]);
8460 for (curr_state = index_to_bundle_states [insn_num];
8462 curr_state = curr_state->next)
8463 if (verbose >= 2 && dump)
8465 /* This structure is taken from generated code of the
8466 pipeline hazard recognizer (see file insn-attrtab.c).
8467 Please don't forget to change the structure if a new
8468 automaton is added to .md file. */
8471 unsigned short one_automaton_state;
8472 unsigned short oneb_automaton_state;
8473 unsigned short two_automaton_state;
8474 unsigned short twob_automaton_state;
8479 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8480 curr_state->unique_num,
8481 (curr_state->originator == NULL
8482 ? -1 : curr_state->originator->unique_num),
8484 curr_state->before_nops_num, curr_state->after_nops_num,
8485 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8486 curr_state->middle_bundle_stops,
8487 (ia64_tune == PROCESSOR_ITANIUM
8488 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
8489 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
8494 /* We should find a solution because the 2nd insn scheduling has
8496 gcc_assert (index_to_bundle_states [insn_num]);
8497 /* Find a state corresponding to the best insn sequence. */
8499 for (curr_state = index_to_bundle_states [insn_num];
8501 curr_state = curr_state->next)
8502 /* We are just looking at the states with fully filled up last
8503 bundle. The first we prefer insn sequences with minimal cost
8504 then with minimal inserted nops and finally with branch insns
8505 placed in the 3rd slots. */
8506 if (curr_state->accumulated_insns_num % 3 == 0
8507 && (best_state == NULL || best_state->cost > curr_state->cost
8508 || (best_state->cost == curr_state->cost
8509 && (curr_state->accumulated_insns_num
8510 < best_state->accumulated_insns_num
8511 || (curr_state->accumulated_insns_num
8512 == best_state->accumulated_insns_num
8513 && (curr_state->branch_deviation
8514 < best_state->branch_deviation
8515 || (curr_state->branch_deviation
8516 == best_state->branch_deviation
8517 && curr_state->middle_bundle_stops
8518 < best_state->middle_bundle_stops)))))))
8519 best_state = curr_state;
8520 /* Second (backward) pass: adding nops and templates. */
8521 gcc_assert (best_state);
8522 insn_num = best_state->before_nops_num;
8523 template0 = template1 = -1;
8524 for (curr_state = best_state;
8525 curr_state->originator != NULL;
8526 curr_state = curr_state->originator)
8528 insn = curr_state->insn;
8529 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8530 || asm_noperands (PATTERN (insn)) >= 0);
8532 if (verbose >= 2 && dump)
8536 unsigned short one_automaton_state;
8537 unsigned short oneb_automaton_state;
8538 unsigned short two_automaton_state;
8539 unsigned short twob_automaton_state;
8544 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8545 curr_state->unique_num,
8546 (curr_state->originator == NULL
8547 ? -1 : curr_state->originator->unique_num),
8549 curr_state->before_nops_num, curr_state->after_nops_num,
8550 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8551 curr_state->middle_bundle_stops,
8552 (ia64_tune == PROCESSOR_ITANIUM
8553 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
8554 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
8557 /* Find the position in the current bundle window. The window can
8558 contain at most two bundles. Two bundle window means that
8559 the processor will make two bundle rotation. */
8560 max_pos = get_max_pos (curr_state->dfa_state);
8562 /* The following (negative template number) means that the
8563 processor did one bundle rotation. */
8564 || (max_pos == 3 && template0 < 0))
8566 /* We are at the end of the window -- find template(s) for
8570 template0 = get_template (curr_state->dfa_state, 3);
8573 template1 = get_template (curr_state->dfa_state, 3);
8574 template0 = get_template (curr_state->dfa_state, 6);
8577 if (max_pos > 3 && template1 < 0)
8578 /* It may happen when we have the stop inside a bundle. */
8580 gcc_assert (pos <= 3);
8581 template1 = get_template (curr_state->dfa_state, 3);
8585 /* Emit nops after the current insn. */
8586 for (i = 0; i < curr_state->after_nops_num; i++)
8589 emit_insn_after (nop, insn);
8591 gcc_assert (pos >= 0);
8594 /* We are at the start of a bundle: emit the template
8595 (it should be defined). */
8596 gcc_assert (template0 >= 0);
8597 ia64_add_bundle_selector_before (template0, nop);
8598 /* If we have two bundle window, we make one bundle
8599 rotation. Otherwise template0 will be undefined
8600 (negative value). */
8601 template0 = template1;
8605 /* Move the position backward in the window. Group barrier has
8606 no slot. Asm insn takes all bundle. */
8607 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8608 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8609 && asm_noperands (PATTERN (insn)) < 0)
8611 /* Long insn takes 2 slots. */
8612 if (ia64_safe_type (insn) == TYPE_L)
8614 gcc_assert (pos >= 0);
8616 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8617 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8618 && asm_noperands (PATTERN (insn)) < 0)
8620 /* The current insn is at the bundle start: emit the
8622 gcc_assert (template0 >= 0);
8623 ia64_add_bundle_selector_before (template0, insn);
8624 b = PREV_INSN (insn);
8626 /* See comment above in analogous place for emitting nops
8628 template0 = template1;
8631 /* Emit nops after the current insn. */
8632 for (i = 0; i < curr_state->before_nops_num; i++)
8635 ia64_emit_insn_before (nop, insn);
8636 nop = PREV_INSN (insn);
8639 gcc_assert (pos >= 0);
8642 /* See comment above in analogous place for emitting nops
8644 gcc_assert (template0 >= 0);
8645 ia64_add_bundle_selector_before (template0, insn);
8646 b = PREV_INSN (insn);
8648 template0 = template1;
8653 if (ia64_tune == PROCESSOR_ITANIUM)
8654 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8655 Itanium1 has a strange design, if the distance between an insn
8656 and dependent MM-insn is less 4 then we have a 6 additional
8657 cycles stall. So we make the distance equal to 4 cycles if it
8659 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8663 gcc_assert (INSN_P (insn)
8664 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8665 && GET_CODE (PATTERN (insn)) != USE
8666 && GET_CODE (PATTERN (insn)) != CLOBBER);
8667 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8668 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
8669 /* We found a MM-insn which needs additional cycles. */
8675 /* Now we are searching for a template of the bundle in
8676 which the MM-insn is placed and the position of the
8677 insn in the bundle (0, 1, 2). Also we are searching
8678 for that there is a stop before the insn. */
8679 last = prev_active_insn (insn);
8680 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
8682 last = prev_active_insn (last);
8684 for (;; last = prev_active_insn (last))
8685 if (recog_memoized (last) == CODE_FOR_bundle_selector)
8687 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
8689 /* The insn is in MLX bundle. Change the template
8690 onto MFI because we will add nops before the
8691 insn. It simplifies subsequent code a lot. */
8693 = gen_bundle_selector (const2_rtx); /* -> MFI */
8696 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
8697 && (ia64_safe_itanium_class (last)
8698 != ITANIUM_CLASS_IGNORE))
8700 /* Some check of correctness: the stop is not at the
8701 bundle start, there are no more 3 insns in the bundle,
8702 and the MM-insn is not at the start of bundle with
8704 gcc_assert ((!pred_stop_p || n)
8706 && (template0 != 9 || !n));
8707 /* Put nops after the insn in the bundle. */
8708 for (j = 3 - n; j > 0; j --)
8709 ia64_emit_insn_before (gen_nop (), insn);
8710 /* It takes into account that we will add more N nops
8711 before the insn lately -- please see code below. */
8712 add_cycles [INSN_UID (insn)]--;
8713 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
8714 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8717 add_cycles [INSN_UID (insn)]--;
8718 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
8720 /* Insert "MII;" template. */
8721 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
8723 ia64_emit_insn_before (gen_nop (), insn);
8724 ia64_emit_insn_before (gen_nop (), insn);
8727 /* To decrease code size, we use "MI;I;"
8729 ia64_emit_insn_before
8730 (gen_insn_group_barrier (GEN_INT (3)), insn);
8733 ia64_emit_insn_before (gen_nop (), insn);
8734 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8737 /* Put the MM-insn in the same slot of a bundle with the
8738 same template as the original one. */
8739 ia64_add_bundle_selector_before (template0, insn);
8740 /* To put the insn in the same slot, add necessary number
8742 for (j = n; j > 0; j --)
8743 ia64_emit_insn_before (gen_nop (), insn);
8744 /* Put the stop if the original bundle had it. */
8746 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8751 #ifdef ENABLE_CHECKING
8753 /* Assert right calculation of middle_bundle_stops. */
8754 int num = best_state->middle_bundle_stops;
8755 bool start_bundle = true, end_bundle = false;
8757 for (insn = NEXT_INSN (prev_head_insn);
8758 insn && insn != tail;
8759 insn = NEXT_INSN (insn))
8763 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8764 start_bundle = true;
8769 for (next_insn = NEXT_INSN (insn);
8770 next_insn && next_insn != tail;
8771 next_insn = NEXT_INSN (next_insn))
8772 if (INSN_P (next_insn)
8773 && (ia64_safe_itanium_class (next_insn)
8774 != ITANIUM_CLASS_IGNORE
8775 || recog_memoized (next_insn)
8776 == CODE_FOR_bundle_selector)
8777 && GET_CODE (PATTERN (next_insn)) != USE
8778 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8781 end_bundle = next_insn == NULL_RTX
8782 || next_insn == tail
8783 || (INSN_P (next_insn)
8784 && recog_memoized (next_insn)
8785 == CODE_FOR_bundle_selector);
8786 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8787 && !start_bundle && !end_bundle
8789 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8790 && asm_noperands (PATTERN (next_insn)) < 0)
8793 start_bundle = false;
8797 gcc_assert (num == 0);
8801 free (index_to_bundle_states);
8802 finish_bundle_state_table ();
8804 dfa_clean_insn_cache ();
8807 /* The following function is called at the end of scheduling BB or
8808 EBB. After reload, it inserts stop bits and does insn bundling. */
8811 ia64_sched_finish (FILE *dump, int sched_verbose)
8814 fprintf (dump, "// Finishing schedule.\n");
8815 if (!reload_completed)
8817 if (reload_completed)
8819 final_emit_insn_group_barriers (dump);
8820 bundling (dump, sched_verbose, current_sched_info->prev_head,
8821 current_sched_info->next_tail);
8822 if (sched_verbose && dump)
8823 fprintf (dump, "// finishing %d-%d\n",
8824 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8825 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8831 /* The following function inserts stop bits in scheduled BB or EBB. */
8834 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8837 int need_barrier_p = 0;
8838 int seen_good_insn = 0;
8839 rtx prev_insn = NULL_RTX;
8841 init_insn_group_barriers ();
8843 for (insn = NEXT_INSN (current_sched_info->prev_head);
8844 insn != current_sched_info->next_tail;
8845 insn = NEXT_INSN (insn))
8847 if (GET_CODE (insn) == BARRIER)
8849 rtx last = prev_active_insn (insn);
8853 if (GET_CODE (last) == JUMP_INSN
8854 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8855 last = prev_active_insn (last);
8856 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8857 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8859 init_insn_group_barriers ();
8862 prev_insn = NULL_RTX;
8864 else if (INSN_P (insn))
8866 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8868 init_insn_group_barriers ();
8871 prev_insn = NULL_RTX;
8873 else if (need_barrier_p || group_barrier_needed (insn)
8874 || (mflag_sched_stop_bits_after_every_cycle
8875 && GET_MODE (insn) == TImode
8878 if (TARGET_EARLY_STOP_BITS)
8883 last != current_sched_info->prev_head;
8884 last = PREV_INSN (last))
8885 if (INSN_P (last) && GET_MODE (last) == TImode
8886 && stops_p [INSN_UID (last)])
8888 if (last == current_sched_info->prev_head)
8890 last = prev_active_insn (last);
8892 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8893 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8895 init_insn_group_barriers ();
8896 for (last = NEXT_INSN (last);
8898 last = NEXT_INSN (last))
8901 group_barrier_needed (last);
8902 if (recog_memoized (last) >= 0
8903 && important_for_bundling_p (last))
8909 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8911 init_insn_group_barriers ();
8914 group_barrier_needed (insn);
8915 if (recog_memoized (insn) >= 0
8916 && important_for_bundling_p (insn))
8918 prev_insn = NULL_RTX;
8920 else if (recog_memoized (insn) >= 0
8921 && important_for_bundling_p (insn))
8926 need_barrier_p = (GET_CODE (insn) == CALL_INSN
8927 || GET_CODE (PATTERN (insn)) == ASM_INPUT
8928 || asm_noperands (PATTERN (insn)) >= 0);
8935 /* If the following function returns TRUE, we will use the DFA
8939 ia64_first_cycle_multipass_dfa_lookahead (void)
8941 return (reload_completed ? 6 : 4);
8944 /* The following function initiates variable `dfa_pre_cycle_insn'. */
8947 ia64_init_dfa_pre_cycle_insn (void)
8949 if (temp_dfa_state == NULL)
8951 dfa_state_size = state_size ();
8952 temp_dfa_state = xmalloc (dfa_state_size);
8953 prev_cycle_state = xmalloc (dfa_state_size);
8955 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
8956 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
8957 recog_memoized (dfa_pre_cycle_insn);
8958 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
8959 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
8960 recog_memoized (dfa_stop_insn);
8963 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
8964 used by the DFA insn scheduler. */
8967 ia64_dfa_pre_cycle_insn (void)
8969 return dfa_pre_cycle_insn;
8972 /* The following function returns TRUE if PRODUCER (of type ilog or
8973 ld) produces address for CONSUMER (of type st or stf). */
8976 ia64_st_address_bypass_p (rtx producer, rtx consumer)
8980 gcc_assert (producer && consumer);
8981 dest = ia64_single_set (producer);
8983 reg = SET_DEST (dest);
8985 if (GET_CODE (reg) == SUBREG)
8986 reg = SUBREG_REG (reg);
8987 gcc_assert (GET_CODE (reg) == REG);
8989 dest = ia64_single_set (consumer);
8991 mem = SET_DEST (dest);
8992 gcc_assert (mem && GET_CODE (mem) == MEM);
8993 return reg_mentioned_p (reg, mem);
8996 /* The following function returns TRUE if PRODUCER (of type ilog or
8997 ld) produces address for CONSUMER (of type ld or fld). */
9000 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9002 rtx dest, src, reg, mem;
9004 gcc_assert (producer && consumer);
9005 dest = ia64_single_set (producer);
9007 reg = SET_DEST (dest);
9009 if (GET_CODE (reg) == SUBREG)
9010 reg = SUBREG_REG (reg);
9011 gcc_assert (GET_CODE (reg) == REG);
9013 src = ia64_single_set (consumer);
9015 mem = SET_SRC (src);
9018 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9019 mem = XVECEXP (mem, 0, 0);
9020 else if (GET_CODE (mem) == IF_THEN_ELSE)
9021 /* ??? Is this bypass necessary for ld.c? */
9023 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9024 mem = XEXP (mem, 1);
9027 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9028 mem = XEXP (mem, 0);
9030 if (GET_CODE (mem) == UNSPEC)
9032 int c = XINT (mem, 1);
9034 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9035 || c == UNSPEC_LDSA);
9036 mem = XVECEXP (mem, 0, 0);
9039 /* Note that LO_SUM is used for GOT loads. */
9040 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9042 return reg_mentioned_p (reg, mem);
9045 /* The following function returns TRUE if INSN produces address for a
9046 load/store insn. We will place such insns into M slot because it
9047 decreases its latency time. */
9050 ia64_produce_address_p (rtx insn)
9056 /* Emit pseudo-ops for the assembler to describe predicate relations.
9057 At present this assumes that we only consider predicate pairs to
9058 be mutex, and that the assembler can deduce proper values from
9059 straight-line code. */
9062 emit_predicate_relation_info (void)
9066 FOR_EACH_BB_REVERSE (bb)
9069 rtx head = BB_HEAD (bb);
9071 /* We only need such notes at code labels. */
9072 if (GET_CODE (head) != CODE_LABEL)
9074 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9075 head = NEXT_INSN (head);
9077 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9078 grabbing the entire block of predicate registers. */
9079 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9080 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9082 rtx p = gen_rtx_REG (BImode, r);
9083 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9084 if (head == BB_END (bb))
9090 /* Look for conditional calls that do not return, and protect predicate
9091 relations around them. Otherwise the assembler will assume the call
9092 returns, and complain about uses of call-clobbered predicates after
9094 FOR_EACH_BB_REVERSE (bb)
9096 rtx insn = BB_HEAD (bb);
9100 if (GET_CODE (insn) == CALL_INSN
9101 && GET_CODE (PATTERN (insn)) == COND_EXEC
9102 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9104 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9105 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9106 if (BB_HEAD (bb) == insn)
9108 if (BB_END (bb) == insn)
9112 if (insn == BB_END (bb))
9114 insn = NEXT_INSN (insn);
9119 /* Perform machine dependent operations on the rtl chain INSNS. */
9124 /* We are freeing block_for_insn in the toplev to keep compatibility
9125 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9126 compute_bb_for_insn ();
9128 /* If optimizing, we'll have split before scheduling. */
9132 if (optimize && ia64_flag_schedule_insns2
9133 && dbg_cnt (ia64_sched2))
9135 timevar_push (TV_SCHED2);
9136 ia64_final_schedule = 1;
9138 initiate_bundle_states ();
9139 ia64_nop = make_insn_raw (gen_nop ());
9140 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9141 recog_memoized (ia64_nop);
9142 clocks_length = get_max_uid () + 1;
9143 stops_p = XCNEWVEC (char, clocks_length);
9144 if (ia64_tune == PROCESSOR_ITANIUM)
9146 clocks = XCNEWVEC (int, clocks_length);
9147 add_cycles = XCNEWVEC (int, clocks_length);
9149 if (ia64_tune == PROCESSOR_ITANIUM2)
9151 pos_1 = get_cpu_unit_code ("2_1");
9152 pos_2 = get_cpu_unit_code ("2_2");
9153 pos_3 = get_cpu_unit_code ("2_3");
9154 pos_4 = get_cpu_unit_code ("2_4");
9155 pos_5 = get_cpu_unit_code ("2_5");
9156 pos_6 = get_cpu_unit_code ("2_6");
9157 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9158 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9159 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9160 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9161 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9162 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9163 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9164 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9165 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9166 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9167 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9168 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9169 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9170 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9171 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9172 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9173 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9174 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9175 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9176 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9180 pos_1 = get_cpu_unit_code ("1_1");
9181 pos_2 = get_cpu_unit_code ("1_2");
9182 pos_3 = get_cpu_unit_code ("1_3");
9183 pos_4 = get_cpu_unit_code ("1_4");
9184 pos_5 = get_cpu_unit_code ("1_5");
9185 pos_6 = get_cpu_unit_code ("1_6");
9186 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9187 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9188 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9189 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9190 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9191 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9192 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9193 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9194 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9195 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9196 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9197 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9198 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9199 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9200 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9201 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9202 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9203 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9204 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9205 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9208 if (flag_selective_scheduling2
9209 && !maybe_skip_selective_scheduling ())
9210 run_selective_scheduling ();
9214 /* Redo alignment computation, as it might gone wrong. */
9215 compute_alignments ();
9217 /* We cannot reuse this one because it has been corrupted by the
9219 finish_bundle_states ();
9220 if (ia64_tune == PROCESSOR_ITANIUM)
9227 emit_insn_group_barriers (dump_file);
9229 ia64_final_schedule = 0;
9230 timevar_pop (TV_SCHED2);
9233 emit_all_insn_group_barriers (dump_file);
9237 /* A call must not be the last instruction in a function, so that the
9238 return address is still within the function, so that unwinding works
9239 properly. Note that IA-64 differs from dwarf2 on this point. */
9240 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
9245 insn = get_last_insn ();
9246 if (! INSN_P (insn))
9247 insn = prev_active_insn (insn);
9248 /* Skip over insns that expand to nothing. */
9249 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
9251 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9252 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9254 insn = prev_active_insn (insn);
9256 if (GET_CODE (insn) == CALL_INSN)
9259 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9260 emit_insn (gen_break_f ());
9261 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9265 emit_predicate_relation_info ();
9267 if (ia64_flag_var_tracking)
9269 timevar_push (TV_VAR_TRACKING);
9270 variable_tracking_main ();
9271 timevar_pop (TV_VAR_TRACKING);
9273 df_finish_pass (false);
9276 /* Return true if REGNO is used by the epilogue. */
9279 ia64_epilogue_uses (int regno)
9284 /* With a call to a function in another module, we will write a new
9285 value to "gp". After returning from such a call, we need to make
9286 sure the function restores the original gp-value, even if the
9287 function itself does not use the gp anymore. */
9288 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9290 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9291 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9292 /* For functions defined with the syscall_linkage attribute, all
9293 input registers are marked as live at all function exits. This
9294 prevents the register allocator from using the input registers,
9295 which in turn makes it possible to restart a system call after
9296 an interrupt without having to save/restore the input registers.
9297 This also prevents kernel data from leaking to application code. */
9298 return lookup_attribute ("syscall_linkage",
9299 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9302 /* Conditional return patterns can't represent the use of `b0' as
9303 the return address, so we force the value live this way. */
9307 /* Likewise for ar.pfs, which is used by br.ret. */
9315 /* Return true if REGNO is used by the frame unwinder. */
9318 ia64_eh_uses (int regno)
9320 enum ia64_frame_regs r;
9322 if (! reload_completed)
9328 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9329 if (regno == current_frame_info.r[r]
9330 || regno == emitted_frame_related_regs[r])
9336 /* Return true if this goes in small data/bss. */
9338 /* ??? We could also support own long data here. Generating movl/add/ld8
9339 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9340 code faster because there is one less load. This also includes incomplete
9341 types which can't go in sdata/sbss. */
9344 ia64_in_small_data_p (const_tree exp)
9346 if (TARGET_NO_SDATA)
9349 /* We want to merge strings, so we never consider them small data. */
9350 if (TREE_CODE (exp) == STRING_CST)
9353 /* Functions are never small data. */
9354 if (TREE_CODE (exp) == FUNCTION_DECL)
9357 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9359 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9361 if (strcmp (section, ".sdata") == 0
9362 || strncmp (section, ".sdata.", 7) == 0
9363 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9364 || strcmp (section, ".sbss") == 0
9365 || strncmp (section, ".sbss.", 6) == 0
9366 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9371 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9373 /* If this is an incomplete type with size 0, then we can't put it
9374 in sdata because it might be too big when completed. */
9375 if (size > 0 && size <= ia64_section_threshold)
9382 /* Output assembly directives for prologue regions. */
9384 /* The current basic block number. */
9386 static bool last_block;
9388 /* True if we need a copy_state command at the start of the next block. */
9390 static bool need_copy_state;
9392 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9393 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9396 /* Emit a debugging label after a call-frame-related insn. We'd
9397 rather output the label right away, but we'd have to output it
9398 after, not before, the instruction, and the instruction has not
9399 been output yet. So we emit the label after the insn, delete it to
9400 avoid introducing basic blocks, and mark it as preserved, such that
9401 it is still output, given that it is referenced in debug info. */
9404 ia64_emit_deleted_label_after_insn (rtx insn)
9406 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9407 rtx lb = gen_label_rtx ();
9408 rtx label_insn = emit_label_after (lb, insn);
9410 LABEL_PRESERVE_P (lb) = 1;
9412 delete_insn (label_insn);
9414 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9416 return xstrdup (label);
9419 /* Define the CFA after INSN with the steady-state definition. */
9422 ia64_dwarf2out_def_steady_cfa (rtx insn)
9424 rtx fp = frame_pointer_needed
9425 ? hard_frame_pointer_rtx
9426 : stack_pointer_rtx;
9429 (ia64_emit_deleted_label_after_insn (insn),
9431 ia64_initial_elimination_offset
9432 (REGNO (arg_pointer_rtx), REGNO (fp))
9433 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9436 /* The generic dwarf2 frame debug info generator does not define a
9437 separate region for the very end of the epilogue, so refrain from
9438 doing so in the IA64-specific code as well. */
9440 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9442 /* The function emits unwind directives for the start of an epilogue. */
9445 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9447 /* If this isn't the last block of the function, then we need to label the
9448 current state, and copy it back in at the start of the next block. */
9453 fprintf (asm_out_file, "\t.label_state %d\n",
9454 ++cfun->machine->state_num);
9455 need_copy_state = true;
9459 fprintf (asm_out_file, "\t.restore sp\n");
9460 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9461 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9462 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9465 /* This function processes a SET pattern looking for specific patterns
9466 which result in emitting an assembly directive required for unwinding. */
9469 process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
9471 rtx src = SET_SRC (pat);
9472 rtx dest = SET_DEST (pat);
9473 int src_regno, dest_regno;
9475 /* Look for the ALLOC insn. */
9476 if (GET_CODE (src) == UNSPEC_VOLATILE
9477 && XINT (src, 1) == UNSPECV_ALLOC
9478 && GET_CODE (dest) == REG)
9480 dest_regno = REGNO (dest);
9482 /* If this is the final destination for ar.pfs, then this must
9483 be the alloc in the prologue. */
9484 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
9487 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9488 ia64_dbx_register_number (dest_regno));
9492 /* This must be an alloc before a sibcall. We must drop the
9493 old frame info. The easiest way to drop the old frame
9494 info is to ensure we had a ".restore sp" directive
9495 followed by a new prologue. If the procedure doesn't
9496 have a memory-stack frame, we'll issue a dummy ".restore
9498 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
9499 /* if haven't done process_epilogue() yet, do it now */
9500 process_epilogue (asm_out_file, insn, unwind, frame);
9502 fprintf (asm_out_file, "\t.prologue\n");
9507 /* Look for SP = .... */
9508 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
9510 if (GET_CODE (src) == PLUS)
9512 rtx op0 = XEXP (src, 0);
9513 rtx op1 = XEXP (src, 1);
9515 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9517 if (INTVAL (op1) < 0)
9519 gcc_assert (!frame_pointer_needed);
9521 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9524 ia64_dwarf2out_def_steady_cfa (insn);
9527 process_epilogue (asm_out_file, insn, unwind, frame);
9531 gcc_assert (GET_CODE (src) == REG
9532 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
9533 process_epilogue (asm_out_file, insn, unwind, frame);
9539 /* Register move we need to look at. */
9540 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9542 src_regno = REGNO (src);
9543 dest_regno = REGNO (dest);
9548 /* Saving return address pointer. */
9549 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9551 fprintf (asm_out_file, "\t.save rp, r%d\n",
9552 ia64_dbx_register_number (dest_regno));
9556 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9558 fprintf (asm_out_file, "\t.save pr, r%d\n",
9559 ia64_dbx_register_number (dest_regno));
9562 case AR_UNAT_REGNUM:
9563 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9565 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9566 ia64_dbx_register_number (dest_regno));
9570 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9572 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9573 ia64_dbx_register_number (dest_regno));
9576 case STACK_POINTER_REGNUM:
9577 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9578 && frame_pointer_needed);
9580 fprintf (asm_out_file, "\t.vframe r%d\n",
9581 ia64_dbx_register_number (dest_regno));
9583 ia64_dwarf2out_def_steady_cfa (insn);
9587 /* Everything else should indicate being stored to memory. */
9592 /* Memory store we need to look at. */
9593 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
9599 if (GET_CODE (XEXP (dest, 0)) == REG)
9601 base = XEXP (dest, 0);
9606 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9607 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9608 base = XEXP (XEXP (dest, 0), 0);
9609 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9612 if (base == hard_frame_pointer_rtx)
9614 saveop = ".savepsp";
9619 gcc_assert (base == stack_pointer_rtx);
9623 src_regno = REGNO (src);
9627 gcc_assert (!current_frame_info.r[reg_save_b0]);
9629 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
9633 gcc_assert (!current_frame_info.r[reg_save_pr]);
9635 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
9639 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9641 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
9645 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9647 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
9650 case AR_UNAT_REGNUM:
9651 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9653 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
9661 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9662 1 << (src_regno - GR_REG (4)));
9671 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9672 1 << (src_regno - BR_REG (1)));
9680 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9681 1 << (src_regno - FR_REG (2)));
9684 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9685 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9686 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9687 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9689 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9690 1 << (src_regno - FR_REG (12)));
9702 /* This function looks at a single insn and emits any directives
9703 required to unwind this insn. */
9705 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9707 bool unwind = (flag_unwind_tables
9708 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9709 bool frame = dwarf2out_do_frame ();
9711 if (unwind || frame)
9715 if (NOTE_INSN_BASIC_BLOCK_P (insn))
9717 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9719 /* Restore unwind state from immediately before the epilogue. */
9720 if (need_copy_state)
9724 fprintf (asm_out_file, "\t.body\n");
9725 fprintf (asm_out_file, "\t.copy_state %d\n",
9726 cfun->machine->state_num);
9728 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9729 ia64_dwarf2out_def_steady_cfa (insn);
9730 need_copy_state = false;
9734 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9737 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9739 pat = XEXP (pat, 0);
9741 pat = PATTERN (insn);
9743 switch (GET_CODE (pat))
9746 process_set (asm_out_file, pat, insn, unwind, frame);
9752 int limit = XVECLEN (pat, 0);
9753 for (par_index = 0; par_index < limit; par_index++)
9755 rtx x = XVECEXP (pat, 0, par_index);
9756 if (GET_CODE (x) == SET)
9757 process_set (asm_out_file, x, insn, unwind, frame);
9772 IA64_BUILTIN_COPYSIGNQ,
9774 IA64_BUILTIN_FLUSHRS,
9776 IA64_BUILTIN_HUGE_VALQ
9780 ia64_init_builtins (void)
9785 /* The __fpreg type. */
9786 fpreg_type = make_node (REAL_TYPE);
9787 TYPE_PRECISION (fpreg_type) = 82;
9788 layout_type (fpreg_type);
9789 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9791 /* The __float80 type. */
9792 float80_type = make_node (REAL_TYPE);
9793 TYPE_PRECISION (float80_type) = 80;
9794 layout_type (float80_type);
9795 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9797 /* The __float128 type. */
9801 tree float128_type = make_node (REAL_TYPE);
9803 TYPE_PRECISION (float128_type) = 128;
9804 layout_type (float128_type);
9805 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9807 /* TFmode support builtins. */
9808 ftype = build_function_type (float128_type, void_list_node);
9809 add_builtin_function ("__builtin_infq", ftype,
9810 IA64_BUILTIN_INFQ, BUILT_IN_MD,
9813 add_builtin_function ("__builtin_huge_valq", ftype,
9814 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
9817 ftype = build_function_type_list (float128_type,
9820 decl = add_builtin_function ("__builtin_fabsq", ftype,
9821 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
9822 "__fabstf2", NULL_TREE);
9823 TREE_READONLY (decl) = 1;
9825 ftype = build_function_type_list (float128_type,
9829 decl = add_builtin_function ("__builtin_copysignq", ftype,
9830 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
9831 "__copysigntf3", NULL_TREE);
9832 TREE_READONLY (decl) = 1;
9835 /* Under HPUX, this is a synonym for "long double". */
9836 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9839 #define def_builtin(name, type, code) \
9840 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
9843 def_builtin ("__builtin_ia64_bsp",
9844 build_function_type (ptr_type_node, void_list_node),
9847 def_builtin ("__builtin_ia64_flushrs",
9848 build_function_type (void_type_node, void_list_node),
9849 IA64_BUILTIN_FLUSHRS);
9855 if (built_in_decls [BUILT_IN_FINITE])
9856 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
9858 if (built_in_decls [BUILT_IN_FINITEF])
9859 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
9861 if (built_in_decls [BUILT_IN_FINITEL])
9862 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
9868 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9869 enum machine_mode mode ATTRIBUTE_UNUSED,
9870 int ignore ATTRIBUTE_UNUSED)
9872 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9873 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9877 case IA64_BUILTIN_BSP:
9878 if (! target || ! register_operand (target, DImode))
9879 target = gen_reg_rtx (DImode);
9880 emit_insn (gen_bsp_value (target));
9881 #ifdef POINTERS_EXTEND_UNSIGNED
9882 target = convert_memory_address (ptr_mode, target);
9886 case IA64_BUILTIN_FLUSHRS:
9887 emit_insn (gen_flushrs ());
9890 case IA64_BUILTIN_INFQ:
9891 case IA64_BUILTIN_HUGE_VALQ:
9893 REAL_VALUE_TYPE inf;
9897 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
9899 tmp = validize_mem (force_const_mem (mode, tmp));
9902 target = gen_reg_rtx (mode);
9904 emit_move_insn (target, tmp);
9908 case IA64_BUILTIN_FABSQ:
9909 case IA64_BUILTIN_COPYSIGNQ:
9910 return expand_call (exp, target, ignore);
9919 /* For the HP-UX IA64 aggregate parameters are passed stored in the
9920 most significant bits of the stack slot. */
9923 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
9925 /* Exception to normal case for structures/unions/etc. */
9927 if (type && AGGREGATE_TYPE_P (type)
9928 && int_size_in_bytes (type) < UNITS_PER_WORD)
9931 /* Fall back to the default. */
9932 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9935 /* Emit text to declare externally defined variables and functions, because
9936 the Intel assembler does not support undefined externals. */
9939 ia64_asm_output_external (FILE *file, tree decl, const char *name)
9941 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
9942 set in order to avoid putting out names that are never really
9944 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
9946 /* maybe_assemble_visibility will return 1 if the assembler
9947 visibility directive is output. */
9948 int need_visibility = ((*targetm.binds_local_p) (decl)
9949 && maybe_assemble_visibility (decl));
9951 /* GNU as does not need anything here, but the HP linker does
9952 need something for external functions. */
9953 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
9954 && TREE_CODE (decl) == FUNCTION_DECL)
9955 (*targetm.asm_out.globalize_decl_name) (file, decl);
9956 else if (need_visibility && !TARGET_GNU_AS)
9957 (*targetm.asm_out.globalize_label) (file, name);
9961 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
9962 modes of word_mode and larger. Rename the TFmode libfuncs using the
9963 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
9964 backward compatibility. */
9967 ia64_init_libfuncs (void)
9969 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
9970 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
9971 set_optab_libfunc (smod_optab, SImode, "__modsi3");
9972 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
9974 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
9975 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
9976 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
9977 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
9978 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
9980 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
9981 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
9982 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
9983 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
9984 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
9985 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
9987 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
9988 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
9989 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
9990 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
9991 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
9993 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
9994 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
9995 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
9996 /* HP-UX 11.23 libc does not have a function for unsigned
9997 SImode-to-TFmode conversion. */
9998 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10001 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10004 ia64_hpux_init_libfuncs (void)
10006 ia64_init_libfuncs ();
10008 /* The HP SI millicode division and mod functions expect DI arguments.
10009 By turning them off completely we avoid using both libgcc and the
10010 non-standard millicode routines and use the HP DI millicode routines
10013 set_optab_libfunc (sdiv_optab, SImode, 0);
10014 set_optab_libfunc (udiv_optab, SImode, 0);
10015 set_optab_libfunc (smod_optab, SImode, 0);
10016 set_optab_libfunc (umod_optab, SImode, 0);
10018 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10019 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10020 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10021 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10023 /* HP-UX libc has TF min/max/abs routines in it. */
10024 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10025 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10026 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10028 /* ia64_expand_compare uses this. */
10029 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10031 /* These should never be used. */
10032 set_optab_libfunc (eq_optab, TFmode, 0);
10033 set_optab_libfunc (ne_optab, TFmode, 0);
10034 set_optab_libfunc (gt_optab, TFmode, 0);
10035 set_optab_libfunc (ge_optab, TFmode, 0);
10036 set_optab_libfunc (lt_optab, TFmode, 0);
10037 set_optab_libfunc (le_optab, TFmode, 0);
10040 /* Rename the division and modulus functions in VMS. */
10043 ia64_vms_init_libfuncs (void)
10045 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10046 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10047 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10048 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10049 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10050 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10051 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10052 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10055 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10056 the HPUX conventions. */
10059 ia64_sysv4_init_libfuncs (void)
10061 ia64_init_libfuncs ();
10063 /* These functions are not part of the HPUX TFmode interface. We
10064 use them instead of _U_Qfcmp, which doesn't work the way we
10066 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10067 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10068 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10069 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10070 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10071 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10073 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10074 glibc doesn't have them. */
10080 ia64_soft_fp_init_libfuncs (void)
10084 /* For HPUX, it is illegal to have relocations in shared segments. */
10087 ia64_hpux_reloc_rw_mask (void)
10092 /* For others, relax this so that relocations to local data goes in
10093 read-only segments, but we still cannot allow global relocations
10094 in read-only segments. */
10097 ia64_reloc_rw_mask (void)
10099 return flag_pic ? 3 : 2;
10102 /* Return the section to use for X. The only special thing we do here
10103 is to honor small data. */
10106 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10107 unsigned HOST_WIDE_INT align)
10109 if (GET_MODE_SIZE (mode) > 0
10110 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10111 && !TARGET_NO_SDATA)
10112 return sdata_section;
10114 return default_elf_select_rtx_section (mode, x, align);
10117 static unsigned int
10118 ia64_section_type_flags (tree decl, const char *name, int reloc)
10120 unsigned int flags = 0;
10122 if (strcmp (name, ".sdata") == 0
10123 || strncmp (name, ".sdata.", 7) == 0
10124 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10125 || strncmp (name, ".sdata2.", 8) == 0
10126 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10127 || strcmp (name, ".sbss") == 0
10128 || strncmp (name, ".sbss.", 6) == 0
10129 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10130 flags = SECTION_SMALL;
10132 flags |= default_section_type_flags (decl, name, reloc);
10136 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10137 structure type and that the address of that type should be passed
10138 in out0, rather than in r8. */
10141 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10143 tree ret_type = TREE_TYPE (fntype);
10145 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10146 as the structure return address parameter, if the return value
10147 type has a non-trivial copy constructor or destructor. It is not
10148 clear if this same convention should be used for other
10149 programming languages. Until G++ 3.4, we incorrectly used r8 for
10150 these return values. */
10151 return (abi_version_at_least (2)
10153 && TYPE_MODE (ret_type) == BLKmode
10154 && TREE_ADDRESSABLE (ret_type)
10155 && strcmp (lang_hooks.name, "GNU C++") == 0);
10158 /* Output the assembler code for a thunk function. THUNK_DECL is the
10159 declaration for the thunk function itself, FUNCTION is the decl for
10160 the target function. DELTA is an immediate constant offset to be
10161 added to THIS. If VCALL_OFFSET is nonzero, the word at
10162 *(*this + vcall_offset) should be added to THIS. */
10165 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10166 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10169 rtx this_rtx, insn, funexp;
10170 unsigned int this_parmno;
10171 unsigned int this_regno;
10174 reload_completed = 1;
10175 epilogue_completed = 1;
10177 /* Set things up as ia64_expand_prologue might. */
10178 last_scratch_gr_reg = 15;
10180 memset (¤t_frame_info, 0, sizeof (current_frame_info));
10181 current_frame_info.spill_cfa_off = -16;
10182 current_frame_info.n_input_regs = 1;
10183 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10185 /* Mark the end of the (empty) prologue. */
10186 emit_note (NOTE_INSN_PROLOGUE_END);
10188 /* Figure out whether "this" will be the first parameter (the
10189 typical case) or the second parameter (as happens when the
10190 virtual function returns certain class objects). */
10192 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10194 this_regno = IN_REG (this_parmno);
10195 if (!TARGET_REG_NAMES)
10196 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10198 this_rtx = gen_rtx_REG (Pmode, this_regno);
10200 /* Apply the constant offset, if required. */
10201 delta_rtx = GEN_INT (delta);
10204 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10205 REG_POINTER (tmp) = 1;
10206 if (delta && satisfies_constraint_I (delta_rtx))
10208 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10212 emit_insn (gen_ptr_extend (this_rtx, tmp));
10216 if (!satisfies_constraint_I (delta_rtx))
10218 rtx tmp = gen_rtx_REG (Pmode, 2);
10219 emit_move_insn (tmp, delta_rtx);
10222 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10225 /* Apply the offset from the vtable, if required. */
10228 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10229 rtx tmp = gen_rtx_REG (Pmode, 2);
10233 rtx t = gen_rtx_REG (ptr_mode, 2);
10234 REG_POINTER (t) = 1;
10235 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10236 if (satisfies_constraint_I (vcall_offset_rtx))
10238 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10242 emit_insn (gen_ptr_extend (tmp, t));
10245 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10249 if (!satisfies_constraint_J (vcall_offset_rtx))
10251 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10252 emit_move_insn (tmp2, vcall_offset_rtx);
10253 vcall_offset_rtx = tmp2;
10255 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10259 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10261 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10263 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10266 /* Generate a tail call to the target function. */
10267 if (! TREE_USED (function))
10269 assemble_external (function);
10270 TREE_USED (function) = 1;
10272 funexp = XEXP (DECL_RTL (function), 0);
10273 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10274 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10275 insn = get_last_insn ();
10276 SIBLING_CALL_P (insn) = 1;
10278 /* Code generation for calls relies on splitting. */
10279 reload_completed = 1;
10280 epilogue_completed = 1;
10281 try_split (PATTERN (insn), insn, 0);
10285 /* Run just enough of rest_of_compilation to get the insns emitted.
10286 There's not really enough bulk here to make other passes such as
10287 instruction scheduling worth while. Note that use_thunk calls
10288 assemble_start_function and assemble_end_function. */
10290 insn_locators_alloc ();
10291 emit_all_insn_group_barriers (NULL);
10292 insn = get_insns ();
10293 shorten_branches (insn);
10294 final_start_function (insn, file, 1);
10295 final (insn, file, 1);
10296 final_end_function ();
10297 free_after_compilation (cfun);
10299 reload_completed = 0;
10300 epilogue_completed = 0;
10303 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10306 ia64_struct_value_rtx (tree fntype,
10307 int incoming ATTRIBUTE_UNUSED)
10309 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
10311 return gen_rtx_REG (Pmode, GR_REG (8));
10315 ia64_scalar_mode_supported_p (enum machine_mode mode)
10341 ia64_vector_mode_supported_p (enum machine_mode mode)
10358 /* Implement the FUNCTION_PROFILER macro. */
10361 ia64_output_function_profiler (FILE *file, int labelno)
10363 bool indirect_call;
10365 /* If the function needs a static chain and the static chain
10366 register is r15, we use an indirect call so as to bypass
10367 the PLT stub in case the executable is dynamically linked,
10368 because the stub clobbers r15 as per 5.3.6 of the psABI.
10369 We don't need to do that in non canonical PIC mode. */
10371 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10373 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10374 indirect_call = true;
10377 indirect_call = false;
10380 fputs ("\t.prologue 4, r40\n", file);
10382 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10383 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10385 if (NO_PROFILE_COUNTERS)
10386 fputs ("\tmov out3 = r0\n", file);
10390 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10392 if (TARGET_AUTO_PIC)
10393 fputs ("\tmovl out3 = @gprel(", file);
10395 fputs ("\taddl out3 = @ltoff(", file);
10396 assemble_name (file, buf);
10397 if (TARGET_AUTO_PIC)
10398 fputs (")\n", file);
10400 fputs ("), r1\n", file);
10404 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10405 fputs ("\t;;\n", file);
10407 fputs ("\t.save rp, r42\n", file);
10408 fputs ("\tmov out2 = b0\n", file);
10410 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10411 fputs ("\t.body\n", file);
10412 fputs ("\tmov out1 = r1\n", file);
10415 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10416 fputs ("\tmov b6 = r16\n", file);
10417 fputs ("\tld8 r1 = [r14]\n", file);
10418 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10421 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10424 static GTY(()) rtx mcount_func_rtx;
10426 gen_mcount_func_rtx (void)
10428 if (!mcount_func_rtx)
10429 mcount_func_rtx = init_one_libfunc ("_mcount");
10430 return mcount_func_rtx;
10434 ia64_profile_hook (int labelno)
10438 if (NO_PROFILE_COUNTERS)
10439 label = const0_rtx;
10443 const char *label_name;
10444 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10445 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10446 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10447 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10449 ip = gen_reg_rtx (Pmode);
10450 emit_insn (gen_ip_value (ip));
10451 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10453 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10458 /* Return the mangling of TYPE if it is an extended fundamental type. */
10460 static const char *
10461 ia64_mangle_type (const_tree type)
10463 type = TYPE_MAIN_VARIANT (type);
10465 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10466 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10469 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10471 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10473 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10474 an extended mangling. Elsewhere, "e" is available since long
10475 double is 80 bits. */
10476 if (TYPE_MODE (type) == XFmode)
10477 return TARGET_HPUX ? "u9__float80" : "e";
10478 if (TYPE_MODE (type) == RFmode)
10479 return "u7__fpreg";
10483 /* Return the diagnostic message string if conversion from FROMTYPE to
10484 TOTYPE is not allowed, NULL otherwise. */
10485 static const char *
10486 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10488 /* Reject nontrivial conversion to or from __fpreg. */
10489 if (TYPE_MODE (fromtype) == RFmode
10490 && TYPE_MODE (totype) != RFmode
10491 && TYPE_MODE (totype) != VOIDmode)
10492 return N_("invalid conversion from %<__fpreg%>");
10493 if (TYPE_MODE (totype) == RFmode
10494 && TYPE_MODE (fromtype) != RFmode)
10495 return N_("invalid conversion to %<__fpreg%>");
10499 /* Return the diagnostic message string if the unary operation OP is
10500 not permitted on TYPE, NULL otherwise. */
10501 static const char *
10502 ia64_invalid_unary_op (int op, const_tree type)
10504 /* Reject operations on __fpreg other than unary + or &. */
10505 if (TYPE_MODE (type) == RFmode
10506 && op != CONVERT_EXPR
10507 && op != ADDR_EXPR)
10508 return N_("invalid operation on %<__fpreg%>");
10512 /* Return the diagnostic message string if the binary operation OP is
10513 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10514 static const char *
10515 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10517 /* Reject operations on __fpreg. */
10518 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10519 return N_("invalid operation on %<__fpreg%>");
10523 /* Implement overriding of the optimization options. */
10525 ia64_optimization_options (int level ATTRIBUTE_UNUSED,
10526 int size ATTRIBUTE_UNUSED)
10528 /* Let the scheduler form additional regions. */
10529 set_param_value ("max-sched-extend-regions-iters", 2);
10531 /* Set the default values for cache-related parameters. */
10532 set_param_value ("simultaneous-prefetches", 6);
10533 set_param_value ("l1-cache-line-size", 32);
10535 set_param_value("sched-mem-true-dep-cost", 4);
10538 /* HP-UX version_id attribute.
10539 For object foo, if the version_id is set to 1234 put out an alias
10540 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10541 other than an alias statement because it is an illegal symbol name. */
10544 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10545 tree name ATTRIBUTE_UNUSED,
10547 int flags ATTRIBUTE_UNUSED,
10548 bool *no_add_attrs)
10550 tree arg = TREE_VALUE (args);
10552 if (TREE_CODE (arg) != STRING_CST)
10554 error("version attribute is not a string");
10555 *no_add_attrs = true;
10561 /* Target hook for c_mode_for_suffix. */
10563 static enum machine_mode
10564 ia64_c_mode_for_suffix (char suffix)
10574 #include "gt-ia64.h"