1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* Determines whether we use adds, addl, or movl to generate our
102 TLS immediate offsets. */
103 int ia64_tls_size = 22;
105 /* Which cpu are we scheduling for. */
106 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
108 /* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110 static int ia64_flag_schedule_insns2;
112 /* Determines whether we run variable tracking in machine dependent
114 static int ia64_flag_var_tracking;
116 /* Variables which are this size or smaller are put in the sdata/sbss
119 unsigned int ia64_section_threshold;
121 /* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
125 /* Structure to be filled in by ia64_compute_frame_size with register
126 save masks and offsets for the current function. */
128 struct ia64_frame_info
130 HOST_WIDE_INT total_size; /* size of the stack frame, not including
131 the caller's scratch area. */
132 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
133 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
134 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
135 HARD_REG_SET mask; /* mask of saved registers. */
136 unsigned int gr_used_mask; /* mask of registers in use as gr spill
137 registers or long-term scratches. */
138 int n_spilled; /* number of spilled registers. */
139 int reg_fp; /* register for fp. */
140 int reg_save_b0; /* save register for b0. */
141 int reg_save_pr; /* save register for prs. */
142 int reg_save_ar_pfs; /* save register for ar.pfs. */
143 int reg_save_ar_unat; /* save register for ar.unat. */
144 int reg_save_ar_lc; /* save register for ar.lc. */
145 int reg_save_gp; /* save register for gp. */
146 int n_input_regs; /* number of input registers used. */
147 int n_local_regs; /* number of local registers used. */
148 int n_output_regs; /* number of output registers used. */
149 int n_rotate_regs; /* number of rotating registers used. */
151 char need_regstk; /* true if a .regstk directive needed. */
152 char initialized; /* true if the data is finalized. */
155 /* Current frame information calculated by ia64_compute_frame_size. */
156 static struct ia64_frame_info current_frame_info;
158 static int ia64_first_cycle_multipass_dfa_lookahead (void);
159 static void ia64_dependencies_evaluation_hook (rtx, rtx);
160 static void ia64_init_dfa_pre_cycle_insn (void);
161 static rtx ia64_dfa_pre_cycle_insn (void);
162 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
163 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
164 static rtx gen_tls_get_addr (void);
165 static rtx gen_thread_pointer (void);
166 static int find_gr_spill (int);
167 static int next_scratch_gr_reg (void);
168 static void mark_reg_gr_used_mask (rtx, void *);
169 static void ia64_compute_frame_size (HOST_WIDE_INT);
170 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
171 static void finish_spill_pointers (void);
172 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
173 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
174 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
175 static rtx gen_movdi_x (rtx, rtx, rtx);
176 static rtx gen_fr_spill_x (rtx, rtx, rtx);
177 static rtx gen_fr_restore_x (rtx, rtx, rtx);
179 static enum machine_mode hfa_element_mode (tree, bool);
180 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
182 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
184 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
186 static bool ia64_function_ok_for_sibcall (tree, tree);
187 static bool ia64_return_in_memory (tree, tree);
188 static bool ia64_rtx_costs (rtx, int, int, int *);
189 static void fix_range (const char *);
190 static bool ia64_handle_option (size_t, const char *, int);
191 static struct machine_function * ia64_init_machine_status (void);
192 static void emit_insn_group_barriers (FILE *);
193 static void emit_all_insn_group_barriers (FILE *);
194 static void final_emit_insn_group_barriers (FILE *);
195 static void emit_predicate_relation_info (void);
196 static void ia64_reorg (void);
197 static bool ia64_in_small_data_p (tree);
198 static void process_epilogue (void);
199 static int process_set (FILE *, rtx);
201 static bool ia64_assemble_integer (rtx, unsigned int, int);
202 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
203 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void ia64_output_function_end_prologue (FILE *);
206 static int ia64_issue_rate (void);
207 static int ia64_adjust_cost (rtx, rtx, rtx, int);
208 static void ia64_sched_init (FILE *, int, int);
209 static void ia64_sched_finish (FILE *, int);
210 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
211 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
212 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
213 static int ia64_variable_issue (FILE *, int, rtx, int);
215 static struct bundle_state *get_free_bundle_state (void);
216 static void free_bundle_state (struct bundle_state *);
217 static void initiate_bundle_states (void);
218 static void finish_bundle_states (void);
219 static unsigned bundle_state_hash (const void *);
220 static int bundle_state_eq_p (const void *, const void *);
221 static int insert_bundle_state (struct bundle_state *);
222 static void initiate_bundle_state_table (void);
223 static void finish_bundle_state_table (void);
224 static int try_issue_nops (struct bundle_state *, int);
225 static int try_issue_insn (struct bundle_state *, rtx);
226 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
227 static int get_max_pos (state_t);
228 static int get_template (state_t, int);
230 static rtx get_next_important_insn (rtx, rtx);
231 static void bundling (FILE *, int, rtx, rtx);
233 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
234 HOST_WIDE_INT, tree);
235 static void ia64_file_start (void);
237 static void ia64_select_rtx_section (enum machine_mode, rtx,
238 unsigned HOST_WIDE_INT);
239 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
241 static void ia64_rwreloc_unique_section (tree, int)
243 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
244 unsigned HOST_WIDE_INT)
246 static unsigned int ia64_section_type_flags (tree, const char *, int);
247 static void ia64_hpux_add_extern_decl (tree decl)
249 static void ia64_hpux_file_end (void)
251 static void ia64_init_libfuncs (void)
253 static void ia64_hpux_init_libfuncs (void)
255 static void ia64_sysv4_init_libfuncs (void)
257 static void ia64_vms_init_libfuncs (void)
260 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
261 static void ia64_encode_section_info (tree, rtx, int);
262 static rtx ia64_struct_value_rtx (tree, int);
263 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
264 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
265 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
266 static bool ia64_cannot_force_const_mem (rtx);
268 /* Table of valid machine attributes. */
269 static const struct attribute_spec ia64_attribute_table[] =
271 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
272 { "syscall_linkage", 0, 0, false, true, true, NULL },
273 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
274 { NULL, 0, 0, false, false, false, NULL }
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
281 #undef TARGET_INIT_BUILTINS
282 #define TARGET_INIT_BUILTINS ia64_init_builtins
284 #undef TARGET_EXPAND_BUILTIN
285 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
287 #undef TARGET_ASM_BYTE_OP
288 #define TARGET_ASM_BYTE_OP "\tdata1\t"
289 #undef TARGET_ASM_ALIGNED_HI_OP
290 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
291 #undef TARGET_ASM_ALIGNED_SI_OP
292 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
293 #undef TARGET_ASM_ALIGNED_DI_OP
294 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
295 #undef TARGET_ASM_UNALIGNED_HI_OP
296 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
297 #undef TARGET_ASM_UNALIGNED_SI_OP
298 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
299 #undef TARGET_ASM_UNALIGNED_DI_OP
300 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
301 #undef TARGET_ASM_INTEGER
302 #define TARGET_ASM_INTEGER ia64_assemble_integer
304 #undef TARGET_ASM_FUNCTION_PROLOGUE
305 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
306 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
307 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
308 #undef TARGET_ASM_FUNCTION_EPILOGUE
309 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
311 #undef TARGET_IN_SMALL_DATA_P
312 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
318 #undef TARGET_SCHED_VARIABLE_ISSUE
319 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
320 #undef TARGET_SCHED_INIT
321 #define TARGET_SCHED_INIT ia64_sched_init
322 #undef TARGET_SCHED_FINISH
323 #define TARGET_SCHED_FINISH ia64_sched_finish
324 #undef TARGET_SCHED_REORDER
325 #define TARGET_SCHED_REORDER ia64_sched_reorder
326 #undef TARGET_SCHED_REORDER2
327 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
329 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
330 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
332 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
333 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
335 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
336 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
337 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
338 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
340 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
341 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
342 ia64_first_cycle_multipass_dfa_lookahead_guard
344 #undef TARGET_SCHED_DFA_NEW_CYCLE
345 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
347 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
348 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
349 #undef TARGET_PASS_BY_REFERENCE
350 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
351 #undef TARGET_ARG_PARTIAL_BYTES
352 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
354 #undef TARGET_ASM_OUTPUT_MI_THUNK
355 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
356 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
357 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
359 #undef TARGET_ASM_FILE_START
360 #define TARGET_ASM_FILE_START ia64_file_start
362 #undef TARGET_RTX_COSTS
363 #define TARGET_RTX_COSTS ia64_rtx_costs
364 #undef TARGET_ADDRESS_COST
365 #define TARGET_ADDRESS_COST hook_int_rtx_0
367 #undef TARGET_MACHINE_DEPENDENT_REORG
368 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
370 #undef TARGET_ENCODE_SECTION_INFO
371 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
373 #undef TARGET_SECTION_TYPE_FLAGS
374 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
376 /* ??? ABI doesn't allow us to define this. */
378 #undef TARGET_PROMOTE_FUNCTION_ARGS
379 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
382 /* ??? ABI doesn't allow us to define this. */
384 #undef TARGET_PROMOTE_FUNCTION_RETURN
385 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
388 /* ??? Investigate. */
390 #undef TARGET_PROMOTE_PROTOTYPES
391 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
394 #undef TARGET_STRUCT_VALUE_RTX
395 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
396 #undef TARGET_RETURN_IN_MEMORY
397 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
398 #undef TARGET_SETUP_INCOMING_VARARGS
399 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
400 #undef TARGET_STRICT_ARGUMENT_NAMING
401 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
402 #undef TARGET_MUST_PASS_IN_STACK
403 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
405 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
406 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
408 #undef TARGET_UNWIND_EMIT
409 #define TARGET_UNWIND_EMIT process_for_unwind_directive
411 #undef TARGET_SCALAR_MODE_SUPPORTED_P
412 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
413 #undef TARGET_VECTOR_MODE_SUPPORTED_P
414 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
416 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
417 in an order different from the specified program order. */
418 #undef TARGET_RELAXED_ORDERING
419 #define TARGET_RELAXED_ORDERING true
421 #undef TARGET_DEFAULT_TARGET_FLAGS
422 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
423 #undef TARGET_HANDLE_OPTION
424 #define TARGET_HANDLE_OPTION ia64_handle_option
426 #undef TARGET_CANNOT_FORCE_CONST_MEM
427 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
429 struct gcc_target targetm = TARGET_INITIALIZER;
433 ADDR_AREA_NORMAL, /* normal address area */
434 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
438 static GTY(()) tree small_ident1;
439 static GTY(()) tree small_ident2;
444 if (small_ident1 == 0)
446 small_ident1 = get_identifier ("small");
447 small_ident2 = get_identifier ("__small__");
451 /* Retrieve the address area that has been chosen for the given decl. */
453 static ia64_addr_area
454 ia64_get_addr_area (tree decl)
458 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
464 id = TREE_VALUE (TREE_VALUE (model_attr));
465 if (id == small_ident1 || id == small_ident2)
466 return ADDR_AREA_SMALL;
468 return ADDR_AREA_NORMAL;
472 ia64_handle_model_attribute (tree *node, tree name, tree args,
473 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
475 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
477 tree arg, decl = *node;
480 arg = TREE_VALUE (args);
481 if (arg == small_ident1 || arg == small_ident2)
483 addr_area = ADDR_AREA_SMALL;
487 warning (0, "invalid argument of %qs attribute",
488 IDENTIFIER_POINTER (name));
489 *no_add_attrs = true;
492 switch (TREE_CODE (decl))
495 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
497 && !TREE_STATIC (decl))
499 error ("%Jan address area attribute cannot be specified for "
500 "local variables", decl, decl);
501 *no_add_attrs = true;
503 area = ia64_get_addr_area (decl);
504 if (area != ADDR_AREA_NORMAL && addr_area != area)
506 error ("%Jaddress area of '%s' conflicts with previous "
507 "declaration", decl, decl);
508 *no_add_attrs = true;
513 error ("%Jaddress area attribute cannot be specified for functions",
515 *no_add_attrs = true;
519 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
520 *no_add_attrs = true;
528 ia64_encode_addr_area (tree decl, rtx symbol)
532 flags = SYMBOL_REF_FLAGS (symbol);
533 switch (ia64_get_addr_area (decl))
535 case ADDR_AREA_NORMAL: break;
536 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
537 default: gcc_unreachable ();
539 SYMBOL_REF_FLAGS (symbol) = flags;
543 ia64_encode_section_info (tree decl, rtx rtl, int first)
545 default_encode_section_info (decl, rtl, first);
547 /* Careful not to prod global register variables. */
548 if (TREE_CODE (decl) == VAR_DECL
549 && GET_CODE (DECL_RTL (decl)) == MEM
550 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
551 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
552 ia64_encode_addr_area (decl, XEXP (rtl, 0));
555 /* Implement CONST_OK_FOR_LETTER_P. */
558 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
563 return CONST_OK_FOR_I (value);
565 return CONST_OK_FOR_J (value);
567 return CONST_OK_FOR_K (value);
569 return CONST_OK_FOR_L (value);
571 return CONST_OK_FOR_M (value);
573 return CONST_OK_FOR_N (value);
575 return CONST_OK_FOR_O (value);
577 return CONST_OK_FOR_P (value);
583 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
586 ia64_const_double_ok_for_letter_p (rtx value, char c)
591 return CONST_DOUBLE_OK_FOR_G (value);
597 /* Implement EXTRA_CONSTRAINT. */
600 ia64_extra_constraint (rtx value, char c)
605 /* Non-volatile memory for FP_REG loads/stores. */
606 return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
609 /* 1..4 for shladd arguments. */
610 return (GET_CODE (value) == CONST_INT
611 && INTVAL (value) >= 1 && INTVAL (value) <= 4);
614 /* Non-post-inc memory for asms and other unsavory creatures. */
615 return (GET_CODE (value) == MEM
616 && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
617 && (reload_in_progress || memory_operand (value, VOIDmode)));
620 /* Symbol ref to small-address-area. */
621 return (GET_CODE (value) == SYMBOL_REF
622 && SYMBOL_REF_SMALL_ADDR_P (value));
626 return value == CONST0_RTX (GET_MODE (value));
629 /* An integer vector, such that conversion to an integer yields a
630 value appropriate for an integer 'J' constraint. */
631 if (GET_CODE (value) == CONST_VECTOR
632 && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
634 value = simplify_subreg (DImode, value, GET_MODE (value), 0);
635 return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
640 /* A V2SF vector containing elements that satisfy 'G'. */
642 (GET_CODE (value) == CONST_VECTOR
643 && GET_MODE (value) == V2SFmode
644 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
645 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
652 /* Return 1 if the operands of a move are ok. */
655 ia64_move_ok (rtx dst, rtx src)
657 /* If we're under init_recog_no_volatile, we'll not be able to use
658 memory_operand. So check the code directly and don't worry about
659 the validity of the underlying address, which should have been
660 checked elsewhere anyway. */
661 if (GET_CODE (dst) != MEM)
663 if (GET_CODE (src) == MEM)
665 if (register_operand (src, VOIDmode))
668 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
669 if (INTEGRAL_MODE_P (GET_MODE (dst)))
670 return src == const0_rtx;
672 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
676 addp4_optimize_ok (rtx op1, rtx op2)
678 return (basereg_operand (op1, GET_MODE(op1)) !=
679 basereg_operand (op2, GET_MODE(op2)));
682 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
683 Return the length of the field, or <= 0 on failure. */
686 ia64_depz_field_mask (rtx rop, rtx rshift)
688 unsigned HOST_WIDE_INT op = INTVAL (rop);
689 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
691 /* Get rid of the zero bits we're shifting in. */
694 /* We must now have a solid block of 1's at bit 0. */
695 return exact_log2 (op + 1);
698 /* Return the TLS model to use for ADDR. */
700 static enum tls_model
701 tls_symbolic_operand_type (rtx addr)
703 enum tls_model tls_kind = 0;
705 if (GET_CODE (addr) == CONST)
707 if (GET_CODE (XEXP (addr, 0)) == PLUS
708 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
709 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
711 else if (GET_CODE (addr) == SYMBOL_REF)
712 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
717 /* Return true if X is a constant that is valid for some immediate
718 field in an instruction. */
721 ia64_legitimate_constant_p (rtx x)
723 switch (GET_CODE (x))
730 if (GET_MODE (x) == VOIDmode)
732 return CONST_DOUBLE_OK_FOR_G (x);
736 return tls_symbolic_operand_type (x) == 0;
743 /* Don't allow TLS addresses to get spilled to memory. */
746 ia64_cannot_force_const_mem (rtx x)
748 return tls_symbolic_operand_type (x) != 0;
751 /* Expand a symbolic constant load. */
754 ia64_expand_load_address (rtx dest, rtx src)
756 gcc_assert (GET_CODE (dest) == REG);
758 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
759 having to pointer-extend the value afterward. Other forms of address
760 computation below are also more natural to compute as 64-bit quantities.
761 If we've been given an SImode destination register, change it. */
762 if (GET_MODE (dest) != Pmode)
763 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
767 if (small_addr_symbolic_operand (src, VOIDmode))
771 emit_insn (gen_load_gprel64 (dest, src));
772 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
773 emit_insn (gen_load_fptr (dest, src));
774 else if (sdata_symbolic_operand (src, VOIDmode))
775 emit_insn (gen_load_gprel (dest, src));
778 HOST_WIDE_INT addend = 0;
781 /* We did split constant offsets in ia64_expand_move, and we did try
782 to keep them split in move_operand, but we also allowed reload to
783 rematerialize arbitrary constants rather than spill the value to
784 the stack and reload it. So we have to be prepared here to split
786 if (GET_CODE (src) == CONST)
788 HOST_WIDE_INT hi, lo;
790 hi = INTVAL (XEXP (XEXP (src, 0), 1));
791 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
797 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
801 tmp = gen_rtx_HIGH (Pmode, src);
802 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
803 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
805 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
806 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
810 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
811 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
818 static GTY(()) rtx gen_tls_tga;
820 gen_tls_get_addr (void)
823 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
827 static GTY(()) rtx thread_pointer_rtx;
829 gen_thread_pointer (void)
831 if (!thread_pointer_rtx)
832 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
833 return thread_pointer_rtx;
837 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
838 HOST_WIDE_INT addend)
840 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
841 rtx orig_op0 = op0, orig_op1 = op1;
842 HOST_WIDE_INT addend_lo, addend_hi;
844 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
845 addend_hi = addend - addend_lo;
849 case TLS_MODEL_GLOBAL_DYNAMIC:
852 tga_op1 = gen_reg_rtx (Pmode);
853 emit_insn (gen_load_dtpmod (tga_op1, op1));
855 tga_op2 = gen_reg_rtx (Pmode);
856 emit_insn (gen_load_dtprel (tga_op2, op1));
858 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
859 LCT_CONST, Pmode, 2, tga_op1,
860 Pmode, tga_op2, Pmode);
862 insns = get_insns ();
865 if (GET_MODE (op0) != Pmode)
867 emit_libcall_block (insns, op0, tga_ret, op1);
870 case TLS_MODEL_LOCAL_DYNAMIC:
871 /* ??? This isn't the completely proper way to do local-dynamic
872 If the call to __tls_get_addr is used only by a single symbol,
873 then we should (somehow) move the dtprel to the second arg
874 to avoid the extra add. */
877 tga_op1 = gen_reg_rtx (Pmode);
878 emit_insn (gen_load_dtpmod (tga_op1, op1));
879 tga_op1 = gen_const_mem (Pmode, tga_op1);
881 tga_op2 = const0_rtx;
883 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
884 LCT_CONST, Pmode, 2, tga_op1,
885 Pmode, tga_op2, Pmode);
887 insns = get_insns ();
890 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
892 tmp = gen_reg_rtx (Pmode);
893 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
895 if (!register_operand (op0, Pmode))
896 op0 = gen_reg_rtx (Pmode);
899 emit_insn (gen_load_dtprel (op0, op1));
900 emit_insn (gen_adddi3 (op0, tmp, op0));
903 emit_insn (gen_add_dtprel (op0, op1, tmp));
906 case TLS_MODEL_INITIAL_EXEC:
907 op1 = plus_constant (op1, addend_hi);
910 tmp = gen_reg_rtx (Pmode);
911 emit_insn (gen_load_tprel (tmp, op1));
913 if (!register_operand (op0, Pmode))
914 op0 = gen_reg_rtx (Pmode);
915 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
918 case TLS_MODEL_LOCAL_EXEC:
919 if (!register_operand (op0, Pmode))
920 op0 = gen_reg_rtx (Pmode);
926 emit_insn (gen_load_tprel (op0, op1));
927 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
930 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
938 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
939 orig_op0, 1, OPTAB_DIRECT);
942 if (GET_MODE (orig_op0) == Pmode)
944 return gen_lowpart (GET_MODE (orig_op0), op0);
948 ia64_expand_move (rtx op0, rtx op1)
950 enum machine_mode mode = GET_MODE (op0);
952 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
953 op1 = force_reg (mode, op1);
955 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
957 HOST_WIDE_INT addend = 0;
958 enum tls_model tls_kind;
961 if (GET_CODE (op1) == CONST
962 && GET_CODE (XEXP (op1, 0)) == PLUS
963 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
965 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
966 sym = XEXP (XEXP (op1, 0), 0);
969 tls_kind = tls_symbolic_operand_type (sym);
971 return ia64_expand_tls_address (tls_kind, op0, sym, addend);
973 if (any_offset_symbol_operand (sym, mode))
975 else if (aligned_offset_symbol_operand (sym, mode))
977 HOST_WIDE_INT addend_lo, addend_hi;
979 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
980 addend_hi = addend - addend_lo;
984 op1 = plus_constant (sym, addend_hi);
993 if (reload_completed)
995 /* We really should have taken care of this offset earlier. */
996 gcc_assert (addend == 0);
997 if (ia64_expand_load_address (op0, op1))
1003 rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1005 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1007 op1 = expand_simple_binop (mode, PLUS, subtarget,
1008 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1017 /* Split a move from OP1 to OP0 conditional on COND. */
1020 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1022 rtx insn, first = get_last_insn ();
1024 emit_move_insn (op0, op1);
1026 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1028 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1032 /* Split a post-reload TImode or TFmode reference into two DImode
1033 components. This is made extra difficult by the fact that we do
1034 not get any scratch registers to work with, because reload cannot
1035 be prevented from giving us a scratch that overlaps the register
1036 pair involved. So instead, when addressing memory, we tweak the
1037 pointer register up and back down with POST_INCs. Or up and not
1038 back down when we can get away with it.
1040 REVERSED is true when the loads must be done in reversed order
1041 (high word first) for correctness. DEAD is true when the pointer
1042 dies with the second insn we generate and therefore the second
1043 address must not carry a postmodify.
1045 May return an insn which is to be emitted after the moves. */
1048 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1052 switch (GET_CODE (in))
1055 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1056 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1061 /* Cannot occur reversed. */
1062 gcc_assert (!reversed);
1064 if (GET_MODE (in) != TFmode)
1065 split_double (in, &out[0], &out[1]);
1067 /* split_double does not understand how to split a TFmode
1068 quantity into a pair of DImode constants. */
1071 unsigned HOST_WIDE_INT p[2];
1072 long l[4]; /* TFmode is 128 bits */
1074 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1075 real_to_target (l, &r, TFmode);
1077 if (FLOAT_WORDS_BIG_ENDIAN)
1079 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1080 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1084 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1085 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1087 out[0] = GEN_INT (p[0]);
1088 out[1] = GEN_INT (p[1]);
1094 rtx base = XEXP (in, 0);
1097 switch (GET_CODE (base))
1102 out[0] = adjust_automodify_address
1103 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1104 out[1] = adjust_automodify_address
1105 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1109 /* Reversal requires a pre-increment, which can only
1110 be done as a separate insn. */
1111 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1112 out[0] = adjust_automodify_address
1113 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1114 out[1] = adjust_address (in, DImode, 0);
1119 gcc_assert (!reversed && !dead);
1121 /* Just do the increment in two steps. */
1122 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1123 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1127 gcc_assert (!reversed && !dead);
1129 /* Add 8, subtract 24. */
1130 base = XEXP (base, 0);
1131 out[0] = adjust_automodify_address
1132 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1133 out[1] = adjust_automodify_address
1135 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1140 gcc_assert (!reversed && !dead);
1142 /* Extract and adjust the modification. This case is
1143 trickier than the others, because we might have an
1144 index register, or we might have a combined offset that
1145 doesn't fit a signed 9-bit displacement field. We can
1146 assume the incoming expression is already legitimate. */
1147 offset = XEXP (base, 1);
1148 base = XEXP (base, 0);
1150 out[0] = adjust_automodify_address
1151 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1153 if (GET_CODE (XEXP (offset, 1)) == REG)
1155 /* Can't adjust the postmodify to match. Emit the
1156 original, then a separate addition insn. */
1157 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1158 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1162 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1163 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1165 /* Again the postmodify cannot be made to match,
1166 but in this case it's more efficient to get rid
1167 of the postmodify entirely and fix up with an
1169 out[1] = adjust_automodify_address (in, DImode, base, 8);
1171 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1175 /* Combined offset still fits in the displacement field.
1176 (We cannot overflow it at the high end.) */
1177 out[1] = adjust_automodify_address
1178 (in, DImode, gen_rtx_POST_MODIFY
1179 (Pmode, base, gen_rtx_PLUS
1181 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1200 /* Split a TImode or TFmode move instruction after reload.
1201 This is used by *movtf_internal and *movti_internal. */
1203 ia64_split_tmode_move (rtx operands[])
1205 rtx in[2], out[2], insn;
1208 bool reversed = false;
1210 /* It is possible for reload to decide to overwrite a pointer with
1211 the value it points to. In that case we have to do the loads in
1212 the appropriate order so that the pointer is not destroyed too
1213 early. Also we must not generate a postmodify for that second
1214 load, or rws_access_regno will die. */
1215 if (GET_CODE (operands[1]) == MEM
1216 && reg_overlap_mentioned_p (operands[0], operands[1]))
1218 rtx base = XEXP (operands[1], 0);
1219 while (GET_CODE (base) != REG)
1220 base = XEXP (base, 0);
1222 if (REGNO (base) == REGNO (operands[0]))
1226 /* Another reason to do the moves in reversed order is if the first
1227 element of the target register pair is also the second element of
1228 the source register pair. */
1229 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1230 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1233 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1234 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1236 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1237 if (GET_CODE (EXP) == MEM \
1238 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1239 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1240 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1241 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1242 XEXP (XEXP (EXP, 0), 0), \
1245 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1246 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1247 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1249 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1250 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1251 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1254 emit_insn (fixup[0]);
1256 emit_insn (fixup[1]);
1258 #undef MAYBE_ADD_REG_INC_NOTE
1261 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1262 through memory plus an extra GR scratch register. Except that you can
1263 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1264 SECONDARY_RELOAD_CLASS, but not both.
1266 We got into problems in the first place by allowing a construct like
1267 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1268 This solution attempts to prevent this situation from occurring. When
1269 we see something like the above, we spill the inner register to memory. */
1272 spill_xfmode_operand (rtx in, int force)
1274 if (GET_CODE (in) == SUBREG
1275 && GET_MODE (SUBREG_REG (in)) == TImode
1276 && GET_CODE (SUBREG_REG (in)) == REG)
1278 rtx memt = assign_stack_temp (TImode, 16, 0);
1279 emit_move_insn (memt, SUBREG_REG (in));
1280 return adjust_address (memt, XFmode, 0);
1282 else if (force && GET_CODE (in) == REG)
1284 rtx memx = assign_stack_temp (XFmode, 16, 0);
1285 emit_move_insn (memx, in);
1292 /* Emit comparison instruction if necessary, returning the expression
1293 that holds the compare result in the proper mode. */
1295 static GTY(()) rtx cmptf_libfunc;
1298 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1300 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1303 /* If we have a BImode input, then we already have a compare result, and
1304 do not need to emit another comparison. */
1305 if (GET_MODE (op0) == BImode)
1307 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1310 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1311 magic number as its third argument, that indicates what to do.
1312 The return value is an integer to be compared against zero. */
1313 else if (GET_MODE (op0) == TFmode)
1316 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1322 enum rtx_code ncode;
1325 gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1328 /* 1 = equal, 0 = not equal. Equality operators do
1329 not raise FP_INVALID when given an SNaN operand. */
1330 case EQ: magic = QCMP_EQ; ncode = NE; break;
1331 case NE: magic = QCMP_EQ; ncode = EQ; break;
1332 /* isunordered() from C99. */
1333 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1334 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1335 /* Relational operators raise FP_INVALID when given
1337 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1338 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1339 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1340 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1341 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1342 Expanders for buneq etc. weuld have to be added to ia64.md
1343 for this to be useful. */
1344 default: gcc_unreachable ();
1349 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1350 op0, TFmode, op1, TFmode,
1351 GEN_INT (magic), DImode);
1352 cmp = gen_reg_rtx (BImode);
1353 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1354 gen_rtx_fmt_ee (ncode, BImode,
1357 insns = get_insns ();
1360 emit_libcall_block (insns, cmp, cmp,
1361 gen_rtx_fmt_ee (code, BImode, op0, op1));
1366 cmp = gen_reg_rtx (BImode);
1367 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1368 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1372 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1375 /* Generate an integral vector comparison. */
1378 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1379 rtx dest, rtx op0, rtx op1)
1381 bool negate = false;
1416 rtx w0h, w0l, w1h, w1l, ch, cl;
1417 enum machine_mode wmode;
1418 rtx (*unpack_l) (rtx, rtx, rtx);
1419 rtx (*unpack_h) (rtx, rtx, rtx);
1420 rtx (*pack) (rtx, rtx, rtx);
1422 /* We don't have native unsigned comparisons, but we can generate
1423 them better than generic code can. */
1425 gcc_assert (mode != V2SImode);
1430 pack = gen_pack2_sss;
1431 unpack_l = gen_unpack1_l;
1432 unpack_h = gen_unpack1_h;
1437 pack = gen_pack4_sss;
1438 unpack_l = gen_unpack2_l;
1439 unpack_h = gen_unpack2_h;
1446 /* Unpack into wider vectors, zero extending the elements. */
1448 w0l = gen_reg_rtx (wmode);
1449 w0h = gen_reg_rtx (wmode);
1450 w1l = gen_reg_rtx (wmode);
1451 w1h = gen_reg_rtx (wmode);
1452 emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode)));
1453 emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode)));
1454 emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode)));
1455 emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode)));
1457 /* Compare in the wider mode. */
1459 cl = gen_reg_rtx (wmode);
1460 ch = gen_reg_rtx (wmode);
1461 code = signed_condition (code);
1462 ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l);
1463 negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h);
1465 /* Repack into a single narrower vector. */
1467 emit_insn (pack (dest, cl, ch));
1475 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1476 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1482 ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[])
1484 rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x;
1486 /* In this case, we extract the two SImode quantities and generate
1487 normal comparisons for each of them. */
1489 op1l = gen_lowpart (SImode, operands[1]);
1490 op2l = gen_lowpart (SImode, operands[2]);
1491 op4l = gen_lowpart (SImode, operands[4]);
1492 op5l = gen_lowpart (SImode, operands[5]);
1494 op1h = gen_reg_rtx (SImode);
1495 op2h = gen_reg_rtx (SImode);
1496 op4h = gen_reg_rtx (SImode);
1497 op5h = gen_reg_rtx (SImode);
1499 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h),
1500 gen_lowpart (DImode, operands[1]), GEN_INT (32)));
1501 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h),
1502 gen_lowpart (DImode, operands[2]), GEN_INT (32)));
1503 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h),
1504 gen_lowpart (DImode, operands[4]), GEN_INT (32)));
1505 emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h),
1506 gen_lowpart (DImode, operands[5]), GEN_INT (32)));
1508 bl = gen_reg_rtx (BImode);
1509 x = gen_rtx_fmt_ee (code, BImode, op4l, op5l);
1510 emit_insn (gen_rtx_SET (VOIDmode, bl, x));
1512 bh = gen_reg_rtx (BImode);
1513 x = gen_rtx_fmt_ee (code, BImode, op4h, op5h);
1514 emit_insn (gen_rtx_SET (VOIDmode, bh, x));
1516 /* With the results of the comparisons, emit conditional moves. */
1518 dl = gen_reg_rtx (SImode);
1519 x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l);
1520 emit_insn (gen_rtx_SET (VOIDmode, dl, x));
1522 dh = gen_reg_rtx (SImode);
1523 x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h);
1524 emit_insn (gen_rtx_SET (VOIDmode, dh, x));
1526 /* Merge the two partial results back into a vector. */
1528 x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh);
1529 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1532 /* Emit an integral vector conditional move. */
1535 ia64_expand_vecint_cmov (rtx operands[])
1537 enum machine_mode mode = GET_MODE (operands[0]);
1538 enum rtx_code code = GET_CODE (operands[3]);
1542 /* Since we don't have unsigned V2SImode comparisons, it's more efficient
1543 to special-case them entirely. */
1544 if (mode == V2SImode
1545 && (code == GTU || code == GEU || code == LEU || code == LTU))
1547 ia64_expand_vcondu_v2si (code, operands);
1551 cmp = gen_reg_rtx (mode);
1552 negate = ia64_expand_vecint_compare (code, mode, cmp,
1553 operands[4], operands[5]);
1555 ot = operands[1+negate];
1556 of = operands[2-negate];
1558 if (ot == CONST0_RTX (mode))
1560 if (of == CONST0_RTX (mode))
1562 emit_move_insn (operands[0], ot);
1566 x = gen_rtx_NOT (mode, cmp);
1567 x = gen_rtx_AND (mode, x, of);
1568 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1570 else if (of == CONST0_RTX (mode))
1572 x = gen_rtx_AND (mode, cmp, ot);
1573 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1579 t = gen_reg_rtx (mode);
1580 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1581 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1583 f = gen_reg_rtx (mode);
1584 x = gen_rtx_NOT (mode, cmp);
1585 x = gen_rtx_AND (mode, x, operands[2-negate]);
1586 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1588 x = gen_rtx_IOR (mode, t, f);
1589 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1593 /* Emit an integral vector min or max operation. Return true if all done. */
1596 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1601 /* These four combinations are supported directly. */
1602 if (mode == V8QImode && (code == UMIN || code == UMAX))
1604 if (mode == V4HImode && (code == SMIN || code == SMAX))
1607 /* Everything else implemented via vector comparisons. */
1608 xops[0] = operands[0];
1609 xops[4] = xops[1] = operands[1];
1610 xops[5] = xops[2] = operands[2];
1629 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1631 ia64_expand_vecint_cmov (xops);
1635 /* Emit the appropriate sequence for a call. */
1638 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1643 addr = XEXP (addr, 0);
1644 addr = convert_memory_address (DImode, addr);
1645 b0 = gen_rtx_REG (DImode, R_BR (0));
1647 /* ??? Should do this for functions known to bind local too. */
1648 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1651 insn = gen_sibcall_nogp (addr);
1653 insn = gen_call_nogp (addr, b0);
1655 insn = gen_call_value_nogp (retval, addr, b0);
1656 insn = emit_call_insn (insn);
1661 insn = gen_sibcall_gp (addr);
1663 insn = gen_call_gp (addr, b0);
1665 insn = gen_call_value_gp (retval, addr, b0);
1666 insn = emit_call_insn (insn);
1668 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1672 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1676 ia64_reload_gp (void)
1680 if (current_frame_info.reg_save_gp)
1681 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1684 HOST_WIDE_INT offset;
1686 offset = (current_frame_info.spill_cfa_off
1687 + current_frame_info.spill_size);
1688 if (frame_pointer_needed)
1690 tmp = hard_frame_pointer_rtx;
1695 tmp = stack_pointer_rtx;
1696 offset = current_frame_info.total_size - offset;
1699 if (CONST_OK_FOR_I (offset))
1700 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1701 tmp, GEN_INT (offset)));
1704 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1705 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1706 pic_offset_table_rtx, tmp));
1709 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1712 emit_move_insn (pic_offset_table_rtx, tmp);
1716 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1717 rtx scratch_b, int noreturn_p, int sibcall_p)
1720 bool is_desc = false;
1722 /* If we find we're calling through a register, then we're actually
1723 calling through a descriptor, so load up the values. */
1724 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1729 /* ??? We are currently constrained to *not* use peep2, because
1730 we can legitimately change the global lifetime of the GP
1731 (in the form of killing where previously live). This is
1732 because a call through a descriptor doesn't use the previous
1733 value of the GP, while a direct call does, and we do not
1734 commit to either form until the split here.
1736 That said, this means that we lack precise life info for
1737 whether ADDR is dead after this call. This is not terribly
1738 important, since we can fix things up essentially for free
1739 with the POST_DEC below, but it's nice to not use it when we
1740 can immediately tell it's not necessary. */
1741 addr_dead_p = ((noreturn_p || sibcall_p
1742 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1744 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1746 /* Load the code address into scratch_b. */
1747 tmp = gen_rtx_POST_INC (Pmode, addr);
1748 tmp = gen_rtx_MEM (Pmode, tmp);
1749 emit_move_insn (scratch_r, tmp);
1750 emit_move_insn (scratch_b, scratch_r);
1752 /* Load the GP address. If ADDR is not dead here, then we must
1753 revert the change made above via the POST_INCREMENT. */
1755 tmp = gen_rtx_POST_DEC (Pmode, addr);
1758 tmp = gen_rtx_MEM (Pmode, tmp);
1759 emit_move_insn (pic_offset_table_rtx, tmp);
1766 insn = gen_sibcall_nogp (addr);
1768 insn = gen_call_value_nogp (retval, addr, retaddr);
1770 insn = gen_call_nogp (addr, retaddr);
1771 emit_call_insn (insn);
1773 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1777 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
1779 This differs from the generic code in that we know about the zero-extending
1780 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
1781 also know that ld.acq+cmpxchg.rel equals a full barrier.
1783 The loop we want to generate looks like
1788 new_reg = cmp_reg op val;
1789 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
1790 if (cmp_reg != old_reg)
1793 Note that we only do the plain load from memory once. Subsequent
1794 iterations use the value loaded by the compare-and-swap pattern. */
1797 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
1798 rtx old_dst, rtx new_dst)
1800 enum machine_mode mode = GET_MODE (mem);
1801 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
1802 enum insn_code icode;
1804 /* Special case for using fetchadd. */
1805 if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode))
1808 old_dst = gen_reg_rtx (mode);
1810 emit_insn (gen_memory_barrier ());
1813 icode = CODE_FOR_fetchadd_acq_si;
1815 icode = CODE_FOR_fetchadd_acq_di;
1816 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
1820 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
1822 if (new_reg != new_dst)
1823 emit_move_insn (new_dst, new_reg);
1828 /* Because of the volatile mem read, we get an ld.acq, which is the
1829 front half of the full barrier. The end half is the cmpxchg.rel. */
1830 gcc_assert (MEM_VOLATILE_P (mem));
1832 old_reg = gen_reg_rtx (DImode);
1833 cmp_reg = gen_reg_rtx (DImode);
1834 label = gen_label_rtx ();
1838 val = simplify_gen_subreg (DImode, val, mode, 0);
1839 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
1842 emit_move_insn (cmp_reg, mem);
1846 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
1847 emit_move_insn (old_reg, cmp_reg);
1848 emit_move_insn (ar_ccv, cmp_reg);
1851 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
1856 new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
1859 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
1860 true, OPTAB_DIRECT);
1863 new_reg = gen_lowpart (mode, new_reg);
1865 emit_move_insn (new_dst, new_reg);
1869 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
1870 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
1871 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
1872 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
1877 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
1879 emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label);
1882 /* Begin the assembly file. */
1885 ia64_file_start (void)
1887 /* Variable tracking should be run after all optimizations which change order
1888 of insns. It also needs a valid CFG. This can't be done in
1889 ia64_override_options, because flag_var_tracking is finalized after
1891 ia64_flag_var_tracking = flag_var_tracking;
1892 flag_var_tracking = 0;
1894 default_file_start ();
1895 emit_safe_across_calls ();
1899 emit_safe_across_calls (void)
1901 unsigned int rs, re;
1908 while (rs < 64 && call_used_regs[PR_REG (rs)])
1912 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1916 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1920 fputc (',', asm_out_file);
1922 fprintf (asm_out_file, "p%u", rs);
1924 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1928 fputc ('\n', asm_out_file);
1931 /* Helper function for ia64_compute_frame_size: find an appropriate general
1932 register to spill some special register to. SPECIAL_SPILL_MASK contains
1933 bits in GR0 to GR31 that have already been allocated by this routine.
1934 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1937 find_gr_spill (int try_locals)
1941 /* If this is a leaf function, first try an otherwise unused
1942 call-clobbered register. */
1943 if (current_function_is_leaf)
1945 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1946 if (! regs_ever_live[regno]
1947 && call_used_regs[regno]
1948 && ! fixed_regs[regno]
1949 && ! global_regs[regno]
1950 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1952 current_frame_info.gr_used_mask |= 1 << regno;
1959 regno = current_frame_info.n_local_regs;
1960 /* If there is a frame pointer, then we can't use loc79, because
1961 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1962 reg_name switching code in ia64_expand_prologue. */
1963 if (regno < (80 - frame_pointer_needed))
1965 current_frame_info.n_local_regs = regno + 1;
1966 return LOC_REG (0) + regno;
1970 /* Failed to find a general register to spill to. Must use stack. */
1974 /* In order to make for nice schedules, we try to allocate every temporary
1975 to a different register. We must of course stay away from call-saved,
1976 fixed, and global registers. We must also stay away from registers
1977 allocated in current_frame_info.gr_used_mask, since those include regs
1978 used all through the prologue.
1980 Any register allocated here must be used immediately. The idea is to
1981 aid scheduling, not to solve data flow problems. */
1983 static int last_scratch_gr_reg;
1986 next_scratch_gr_reg (void)
1990 for (i = 0; i < 32; ++i)
1992 regno = (last_scratch_gr_reg + i + 1) & 31;
1993 if (call_used_regs[regno]
1994 && ! fixed_regs[regno]
1995 && ! global_regs[regno]
1996 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1998 last_scratch_gr_reg = regno;
2003 /* There must be _something_ available. */
2007 /* Helper function for ia64_compute_frame_size, called through
2008 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2011 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2013 unsigned int regno = REGNO (reg);
2016 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2017 for (i = 0; i < n; ++i)
2018 current_frame_info.gr_used_mask |= 1 << (regno + i);
2022 /* Returns the number of bytes offset between the frame pointer and the stack
2023 pointer for the current function. SIZE is the number of bytes of space
2024 needed for local variables. */
2027 ia64_compute_frame_size (HOST_WIDE_INT size)
2029 HOST_WIDE_INT total_size;
2030 HOST_WIDE_INT spill_size = 0;
2031 HOST_WIDE_INT extra_spill_size = 0;
2032 HOST_WIDE_INT pretend_args_size;
2035 int spilled_gr_p = 0;
2036 int spilled_fr_p = 0;
2040 if (current_frame_info.initialized)
2043 memset (¤t_frame_info, 0, sizeof current_frame_info);
2044 CLEAR_HARD_REG_SET (mask);
2046 /* Don't allocate scratches to the return register. */
2047 diddle_return_value (mark_reg_gr_used_mask, NULL);
2049 /* Don't allocate scratches to the EH scratch registers. */
2050 if (cfun->machine->ia64_eh_epilogue_sp)
2051 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2052 if (cfun->machine->ia64_eh_epilogue_bsp)
2053 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2055 /* Find the size of the register stack frame. We have only 80 local
2056 registers, because we reserve 8 for the inputs and 8 for the
2059 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2060 since we'll be adjusting that down later. */
2061 regno = LOC_REG (78) + ! frame_pointer_needed;
2062 for (; regno >= LOC_REG (0); regno--)
2063 if (regs_ever_live[regno])
2065 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2067 /* For functions marked with the syscall_linkage attribute, we must mark
2068 all eight input registers as in use, so that locals aren't visible to
2071 if (cfun->machine->n_varargs > 0
2072 || lookup_attribute ("syscall_linkage",
2073 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2074 current_frame_info.n_input_regs = 8;
2077 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2078 if (regs_ever_live[regno])
2080 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2083 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2084 if (regs_ever_live[regno])
2086 i = regno - OUT_REG (0) + 1;
2088 /* When -p profiling, we need one output register for the mcount argument.
2089 Likewise for -a profiling for the bb_init_func argument. For -ax
2090 profiling, we need two output registers for the two bb_init_trace_func
2092 if (current_function_profile)
2094 current_frame_info.n_output_regs = i;
2096 /* ??? No rotating register support yet. */
2097 current_frame_info.n_rotate_regs = 0;
2099 /* Discover which registers need spilling, and how much room that
2100 will take. Begin with floating point and general registers,
2101 which will always wind up on the stack. */
2103 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2104 if (regs_ever_live[regno] && ! call_used_regs[regno])
2106 SET_HARD_REG_BIT (mask, regno);
2112 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2113 if (regs_ever_live[regno] && ! call_used_regs[regno])
2115 SET_HARD_REG_BIT (mask, regno);
2121 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2122 if (regs_ever_live[regno] && ! call_used_regs[regno])
2124 SET_HARD_REG_BIT (mask, regno);
2129 /* Now come all special registers that might get saved in other
2130 general registers. */
2132 if (frame_pointer_needed)
2134 current_frame_info.reg_fp = find_gr_spill (1);
2135 /* If we did not get a register, then we take LOC79. This is guaranteed
2136 to be free, even if regs_ever_live is already set, because this is
2137 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2138 as we don't count loc79 above. */
2139 if (current_frame_info.reg_fp == 0)
2141 current_frame_info.reg_fp = LOC_REG (79);
2142 current_frame_info.n_local_regs++;
2146 if (! current_function_is_leaf)
2148 /* Emit a save of BR0 if we call other functions. Do this even
2149 if this function doesn't return, as EH depends on this to be
2150 able to unwind the stack. */
2151 SET_HARD_REG_BIT (mask, BR_REG (0));
2153 current_frame_info.reg_save_b0 = find_gr_spill (1);
2154 if (current_frame_info.reg_save_b0 == 0)
2160 /* Similarly for ar.pfs. */
2161 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2162 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2163 if (current_frame_info.reg_save_ar_pfs == 0)
2165 extra_spill_size += 8;
2169 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2170 registers are clobbered, so we fall back to the stack. */
2171 current_frame_info.reg_save_gp
2172 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2173 if (current_frame_info.reg_save_gp == 0)
2175 SET_HARD_REG_BIT (mask, GR_REG (1));
2182 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2184 SET_HARD_REG_BIT (mask, BR_REG (0));
2189 if (regs_ever_live[AR_PFS_REGNUM])
2191 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2192 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2193 if (current_frame_info.reg_save_ar_pfs == 0)
2195 extra_spill_size += 8;
2201 /* Unwind descriptor hackery: things are most efficient if we allocate
2202 consecutive GR save registers for RP, PFS, FP in that order. However,
2203 it is absolutely critical that FP get the only hard register that's
2204 guaranteed to be free, so we allocated it first. If all three did
2205 happen to be allocated hard regs, and are consecutive, rearrange them
2206 into the preferred order now. */
2207 if (current_frame_info.reg_fp != 0
2208 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2209 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2211 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2212 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2213 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2216 /* See if we need to store the predicate register block. */
2217 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2218 if (regs_ever_live[regno] && ! call_used_regs[regno])
2220 if (regno <= PR_REG (63))
2222 SET_HARD_REG_BIT (mask, PR_REG (0));
2223 current_frame_info.reg_save_pr = find_gr_spill (1);
2224 if (current_frame_info.reg_save_pr == 0)
2226 extra_spill_size += 8;
2230 /* ??? Mark them all as used so that register renaming and such
2231 are free to use them. */
2232 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2233 regs_ever_live[regno] = 1;
2236 /* If we're forced to use st8.spill, we're forced to save and restore
2237 ar.unat as well. The check for existing liveness allows inline asm
2238 to touch ar.unat. */
2239 if (spilled_gr_p || cfun->machine->n_varargs
2240 || regs_ever_live[AR_UNAT_REGNUM])
2242 regs_ever_live[AR_UNAT_REGNUM] = 1;
2243 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2244 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2245 if (current_frame_info.reg_save_ar_unat == 0)
2247 extra_spill_size += 8;
2252 if (regs_ever_live[AR_LC_REGNUM])
2254 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2255 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2256 if (current_frame_info.reg_save_ar_lc == 0)
2258 extra_spill_size += 8;
2263 /* If we have an odd number of words of pretend arguments written to
2264 the stack, then the FR save area will be unaligned. We round the
2265 size of this area up to keep things 16 byte aligned. */
2267 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2269 pretend_args_size = current_function_pretend_args_size;
2271 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2272 + current_function_outgoing_args_size);
2273 total_size = IA64_STACK_ALIGN (total_size);
2275 /* We always use the 16-byte scratch area provided by the caller, but
2276 if we are a leaf function, there's no one to which we need to provide
2278 if (current_function_is_leaf)
2279 total_size = MAX (0, total_size - 16);
2281 current_frame_info.total_size = total_size;
2282 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2283 current_frame_info.spill_size = spill_size;
2284 current_frame_info.extra_spill_size = extra_spill_size;
2285 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2286 current_frame_info.n_spilled = n_spilled;
2287 current_frame_info.initialized = reload_completed;
2290 /* Compute the initial difference between the specified pair of registers. */
2293 ia64_initial_elimination_offset (int from, int to)
2295 HOST_WIDE_INT offset;
2297 ia64_compute_frame_size (get_frame_size ());
2300 case FRAME_POINTER_REGNUM:
2303 case HARD_FRAME_POINTER_REGNUM:
2304 if (current_function_is_leaf)
2305 offset = -current_frame_info.total_size;
2307 offset = -(current_frame_info.total_size
2308 - current_function_outgoing_args_size - 16);
2311 case STACK_POINTER_REGNUM:
2312 if (current_function_is_leaf)
2315 offset = 16 + current_function_outgoing_args_size;
2323 case ARG_POINTER_REGNUM:
2324 /* Arguments start above the 16 byte save area, unless stdarg
2325 in which case we store through the 16 byte save area. */
2328 case HARD_FRAME_POINTER_REGNUM:
2329 offset = 16 - current_function_pretend_args_size;
2332 case STACK_POINTER_REGNUM:
2333 offset = (current_frame_info.total_size
2334 + 16 - current_function_pretend_args_size);
2349 /* If there are more than a trivial number of register spills, we use
2350 two interleaved iterators so that we can get two memory references
2353 In order to simplify things in the prologue and epilogue expanders,
2354 we use helper functions to fix up the memory references after the
2355 fact with the appropriate offsets to a POST_MODIFY memory mode.
2356 The following data structure tracks the state of the two iterators
2357 while insns are being emitted. */
2359 struct spill_fill_data
2361 rtx init_after; /* point at which to emit initializations */
2362 rtx init_reg[2]; /* initial base register */
2363 rtx iter_reg[2]; /* the iterator registers */
2364 rtx *prev_addr[2]; /* address of last memory use */
2365 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2366 HOST_WIDE_INT prev_off[2]; /* last offset */
2367 int n_iter; /* number of iterators in use */
2368 int next_iter; /* next iterator to use */
2369 unsigned int save_gr_used_mask;
2372 static struct spill_fill_data spill_fill_data;
2375 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2379 spill_fill_data.init_after = get_last_insn ();
2380 spill_fill_data.init_reg[0] = init_reg;
2381 spill_fill_data.init_reg[1] = init_reg;
2382 spill_fill_data.prev_addr[0] = NULL;
2383 spill_fill_data.prev_addr[1] = NULL;
2384 spill_fill_data.prev_insn[0] = NULL;
2385 spill_fill_data.prev_insn[1] = NULL;
2386 spill_fill_data.prev_off[0] = cfa_off;
2387 spill_fill_data.prev_off[1] = cfa_off;
2388 spill_fill_data.next_iter = 0;
2389 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2391 spill_fill_data.n_iter = 1 + (n_spills > 2);
2392 for (i = 0; i < spill_fill_data.n_iter; ++i)
2394 int regno = next_scratch_gr_reg ();
2395 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2396 current_frame_info.gr_used_mask |= 1 << regno;
2401 finish_spill_pointers (void)
2403 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2407 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2409 int iter = spill_fill_data.next_iter;
2410 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2411 rtx disp_rtx = GEN_INT (disp);
2414 if (spill_fill_data.prev_addr[iter])
2416 if (CONST_OK_FOR_N (disp))
2418 *spill_fill_data.prev_addr[iter]
2419 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2420 gen_rtx_PLUS (DImode,
2421 spill_fill_data.iter_reg[iter],
2423 REG_NOTES (spill_fill_data.prev_insn[iter])
2424 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2425 REG_NOTES (spill_fill_data.prev_insn[iter]));
2429 /* ??? Could use register post_modify for loads. */
2430 if (! CONST_OK_FOR_I (disp))
2432 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2433 emit_move_insn (tmp, disp_rtx);
2436 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2437 spill_fill_data.iter_reg[iter], disp_rtx));
2440 /* Micro-optimization: if we've created a frame pointer, it's at
2441 CFA 0, which may allow the real iterator to be initialized lower,
2442 slightly increasing parallelism. Also, if there are few saves
2443 it may eliminate the iterator entirely. */
2445 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2446 && frame_pointer_needed)
2448 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2449 set_mem_alias_set (mem, get_varargs_alias_set ());
2457 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2458 spill_fill_data.init_reg[iter]);
2463 if (! CONST_OK_FOR_I (disp))
2465 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2466 emit_move_insn (tmp, disp_rtx);
2470 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2471 spill_fill_data.init_reg[iter],
2478 /* Careful for being the first insn in a sequence. */
2479 if (spill_fill_data.init_after)
2480 insn = emit_insn_after (seq, spill_fill_data.init_after);
2483 rtx first = get_insns ();
2485 insn = emit_insn_before (seq, first);
2487 insn = emit_insn (seq);
2489 spill_fill_data.init_after = insn;
2491 /* If DISP is 0, we may or may not have a further adjustment
2492 afterward. If we do, then the load/store insn may be modified
2493 to be a post-modify. If we don't, then this copy may be
2494 eliminated by copyprop_hardreg_forward, which makes this
2495 insn garbage, which runs afoul of the sanity check in
2496 propagate_one_insn. So mark this insn as legal to delete. */
2498 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2502 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2504 /* ??? Not all of the spills are for varargs, but some of them are.
2505 The rest of the spills belong in an alias set of their own. But
2506 it doesn't actually hurt to include them here. */
2507 set_mem_alias_set (mem, get_varargs_alias_set ());
2509 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2510 spill_fill_data.prev_off[iter] = cfa_off;
2512 if (++iter >= spill_fill_data.n_iter)
2514 spill_fill_data.next_iter = iter;
2520 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2523 int iter = spill_fill_data.next_iter;
2526 mem = spill_restore_mem (reg, cfa_off);
2527 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2528 spill_fill_data.prev_insn[iter] = insn;
2535 RTX_FRAME_RELATED_P (insn) = 1;
2537 /* Don't even pretend that the unwind code can intuit its way
2538 through a pair of interleaved post_modify iterators. Just
2539 provide the correct answer. */
2541 if (frame_pointer_needed)
2543 base = hard_frame_pointer_rtx;
2548 base = stack_pointer_rtx;
2549 off = current_frame_info.total_size - cfa_off;
2553 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2554 gen_rtx_SET (VOIDmode,
2555 gen_rtx_MEM (GET_MODE (reg),
2556 plus_constant (base, off)),
2563 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2565 int iter = spill_fill_data.next_iter;
2568 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2569 GEN_INT (cfa_off)));
2570 spill_fill_data.prev_insn[iter] = insn;
2573 /* Wrapper functions that discards the CONST_INT spill offset. These
2574 exist so that we can give gr_spill/gr_fill the offset they need and
2575 use a consistent function interface. */
2578 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2580 return gen_movdi (dest, src);
2584 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2586 return gen_fr_spill (dest, src);
2590 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2592 return gen_fr_restore (dest, src);
2595 /* Called after register allocation to add any instructions needed for the
2596 prologue. Using a prologue insn is favored compared to putting all of the
2597 instructions in output_function_prologue(), since it allows the scheduler
2598 to intermix instructions with the saves of the caller saved registers. In
2599 some cases, it might be necessary to emit a barrier instruction as the last
2600 insn to prevent such scheduling.
2602 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2603 so that the debug info generation code can handle them properly.
2605 The register save area is layed out like so:
2607 [ varargs spill area ]
2608 [ fr register spill area ]
2609 [ br register spill area ]
2610 [ ar register spill area ]
2611 [ pr register spill area ]
2612 [ gr register spill area ] */
2614 /* ??? Get inefficient code when the frame size is larger than can fit in an
2615 adds instruction. */
2618 ia64_expand_prologue (void)
2620 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2621 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2624 ia64_compute_frame_size (get_frame_size ());
2625 last_scratch_gr_reg = 15;
2627 /* If there is no epilogue, then we don't need some prologue insns.
2628 We need to avoid emitting the dead prologue insns, because flow
2629 will complain about them. */
2635 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2636 if ((e->flags & EDGE_FAKE) == 0
2637 && (e->flags & EDGE_FALLTHRU) != 0)
2639 epilogue_p = (e != NULL);
2644 /* Set the local, input, and output register names. We need to do this
2645 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2646 half. If we use in/loc/out register names, then we get assembler errors
2647 in crtn.S because there is no alloc insn or regstk directive in there. */
2648 if (! TARGET_REG_NAMES)
2650 int inputs = current_frame_info.n_input_regs;
2651 int locals = current_frame_info.n_local_regs;
2652 int outputs = current_frame_info.n_output_regs;
2654 for (i = 0; i < inputs; i++)
2655 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2656 for (i = 0; i < locals; i++)
2657 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2658 for (i = 0; i < outputs; i++)
2659 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2662 /* Set the frame pointer register name. The regnum is logically loc79,
2663 but of course we'll not have allocated that many locals. Rather than
2664 worrying about renumbering the existing rtxs, we adjust the name. */
2665 /* ??? This code means that we can never use one local register when
2666 there is a frame pointer. loc79 gets wasted in this case, as it is
2667 renamed to a register that will never be used. See also the try_locals
2668 code in find_gr_spill. */
2669 if (current_frame_info.reg_fp)
2671 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2672 reg_names[HARD_FRAME_POINTER_REGNUM]
2673 = reg_names[current_frame_info.reg_fp];
2674 reg_names[current_frame_info.reg_fp] = tmp;
2677 /* We don't need an alloc instruction if we've used no outputs or locals. */
2678 if (current_frame_info.n_local_regs == 0
2679 && current_frame_info.n_output_regs == 0
2680 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2681 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2683 /* If there is no alloc, but there are input registers used, then we
2684 need a .regstk directive. */
2685 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2686 ar_pfs_save_reg = NULL_RTX;
2690 current_frame_info.need_regstk = 0;
2692 if (current_frame_info.reg_save_ar_pfs)
2693 regno = current_frame_info.reg_save_ar_pfs;
2695 regno = next_scratch_gr_reg ();
2696 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2698 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2699 GEN_INT (current_frame_info.n_input_regs),
2700 GEN_INT (current_frame_info.n_local_regs),
2701 GEN_INT (current_frame_info.n_output_regs),
2702 GEN_INT (current_frame_info.n_rotate_regs)));
2703 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2706 /* Set up frame pointer, stack pointer, and spill iterators. */
2708 n_varargs = cfun->machine->n_varargs;
2709 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2710 stack_pointer_rtx, 0);
2712 if (frame_pointer_needed)
2714 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2715 RTX_FRAME_RELATED_P (insn) = 1;
2718 if (current_frame_info.total_size != 0)
2720 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2723 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2724 offset = frame_size_rtx;
2727 regno = next_scratch_gr_reg ();
2728 offset = gen_rtx_REG (DImode, regno);
2729 emit_move_insn (offset, frame_size_rtx);
2732 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2733 stack_pointer_rtx, offset));
2735 if (! frame_pointer_needed)
2737 RTX_FRAME_RELATED_P (insn) = 1;
2738 if (GET_CODE (offset) != CONST_INT)
2741 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2742 gen_rtx_SET (VOIDmode,
2744 gen_rtx_PLUS (DImode,
2751 /* ??? At this point we must generate a magic insn that appears to
2752 modify the stack pointer, the frame pointer, and all spill
2753 iterators. This would allow the most scheduling freedom. For
2754 now, just hard stop. */
2755 emit_insn (gen_blockage ());
2758 /* Must copy out ar.unat before doing any integer spills. */
2759 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2761 if (current_frame_info.reg_save_ar_unat)
2763 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2766 alt_regno = next_scratch_gr_reg ();
2767 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2768 current_frame_info.gr_used_mask |= 1 << alt_regno;
2771 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2772 insn = emit_move_insn (ar_unat_save_reg, reg);
2773 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2775 /* Even if we're not going to generate an epilogue, we still
2776 need to save the register so that EH works. */
2777 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2778 emit_insn (gen_prologue_use (ar_unat_save_reg));
2781 ar_unat_save_reg = NULL_RTX;
2783 /* Spill all varargs registers. Do this before spilling any GR registers,
2784 since we want the UNAT bits for the GR registers to override the UNAT
2785 bits from varargs, which we don't care about. */
2788 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2790 reg = gen_rtx_REG (DImode, regno);
2791 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2794 /* Locate the bottom of the register save area. */
2795 cfa_off = (current_frame_info.spill_cfa_off
2796 + current_frame_info.spill_size
2797 + current_frame_info.extra_spill_size);
2799 /* Save the predicate register block either in a register or in memory. */
2800 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2802 reg = gen_rtx_REG (DImode, PR_REG (0));
2803 if (current_frame_info.reg_save_pr != 0)
2805 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2806 insn = emit_move_insn (alt_reg, reg);
2808 /* ??? Denote pr spill/fill by a DImode move that modifies all
2809 64 hard registers. */
2810 RTX_FRAME_RELATED_P (insn) = 1;
2812 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2813 gen_rtx_SET (VOIDmode, alt_reg, reg),
2816 /* Even if we're not going to generate an epilogue, we still
2817 need to save the register so that EH works. */
2819 emit_insn (gen_prologue_use (alt_reg));
2823 alt_regno = next_scratch_gr_reg ();
2824 alt_reg = gen_rtx_REG (DImode, alt_regno);
2825 insn = emit_move_insn (alt_reg, reg);
2826 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2831 /* Handle AR regs in numerical order. All of them get special handling. */
2832 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2833 && current_frame_info.reg_save_ar_unat == 0)
2835 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2836 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2840 /* The alloc insn already copied ar.pfs into a general register. The
2841 only thing we have to do now is copy that register to a stack slot
2842 if we'd not allocated a local register for the job. */
2843 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2844 && current_frame_info.reg_save_ar_pfs == 0)
2846 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2847 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2851 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2853 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2854 if (current_frame_info.reg_save_ar_lc != 0)
2856 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2857 insn = emit_move_insn (alt_reg, reg);
2858 RTX_FRAME_RELATED_P (insn) = 1;
2860 /* Even if we're not going to generate an epilogue, we still
2861 need to save the register so that EH works. */
2863 emit_insn (gen_prologue_use (alt_reg));
2867 alt_regno = next_scratch_gr_reg ();
2868 alt_reg = gen_rtx_REG (DImode, alt_regno);
2869 emit_move_insn (alt_reg, reg);
2870 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2875 if (current_frame_info.reg_save_gp)
2877 insn = emit_move_insn (gen_rtx_REG (DImode,
2878 current_frame_info.reg_save_gp),
2879 pic_offset_table_rtx);
2880 /* We don't know for sure yet if this is actually needed, since
2881 we've not split the PIC call patterns. If all of the calls
2882 are indirect, and not followed by any uses of the gp, then
2883 this save is dead. Allow it to go away. */
2885 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2888 /* We should now be at the base of the gr/br/fr spill area. */
2889 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
2890 + current_frame_info.spill_size));
2892 /* Spill all general registers. */
2893 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2894 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2896 reg = gen_rtx_REG (DImode, regno);
2897 do_spill (gen_gr_spill, reg, cfa_off, reg);
2901 /* Handle BR0 specially -- it may be getting stored permanently in
2902 some GR register. */
2903 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2905 reg = gen_rtx_REG (DImode, BR_REG (0));
2906 if (current_frame_info.reg_save_b0 != 0)
2908 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2909 insn = emit_move_insn (alt_reg, reg);
2910 RTX_FRAME_RELATED_P (insn) = 1;
2912 /* Even if we're not going to generate an epilogue, we still
2913 need to save the register so that EH works. */
2915 emit_insn (gen_prologue_use (alt_reg));
2919 alt_regno = next_scratch_gr_reg ();
2920 alt_reg = gen_rtx_REG (DImode, alt_regno);
2921 emit_move_insn (alt_reg, reg);
2922 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2927 /* Spill the rest of the BR registers. */
2928 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2929 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2931 alt_regno = next_scratch_gr_reg ();
2932 alt_reg = gen_rtx_REG (DImode, alt_regno);
2933 reg = gen_rtx_REG (DImode, regno);
2934 emit_move_insn (alt_reg, reg);
2935 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2939 /* Align the frame and spill all FR registers. */
2940 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2941 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2943 gcc_assert (!(cfa_off & 15));
2944 reg = gen_rtx_REG (XFmode, regno);
2945 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2949 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
2951 finish_spill_pointers ();
2954 /* Called after register allocation to add any instructions needed for the
2955 epilogue. Using an epilogue insn is favored compared to putting all of the
2956 instructions in output_function_prologue(), since it allows the scheduler
2957 to intermix instructions with the saves of the caller saved registers. In
2958 some cases, it might be necessary to emit a barrier instruction as the last
2959 insn to prevent such scheduling. */
2962 ia64_expand_epilogue (int sibcall_p)
2964 rtx insn, reg, alt_reg, ar_unat_save_reg;
2965 int regno, alt_regno, cfa_off;
2967 ia64_compute_frame_size (get_frame_size ());
2969 /* If there is a frame pointer, then we use it instead of the stack
2970 pointer, so that the stack pointer does not need to be valid when
2971 the epilogue starts. See EXIT_IGNORE_STACK. */
2972 if (frame_pointer_needed)
2973 setup_spill_pointers (current_frame_info.n_spilled,
2974 hard_frame_pointer_rtx, 0);
2976 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2977 current_frame_info.total_size);
2979 if (current_frame_info.total_size != 0)
2981 /* ??? At this point we must generate a magic insn that appears to
2982 modify the spill iterators and the frame pointer. This would
2983 allow the most scheduling freedom. For now, just hard stop. */
2984 emit_insn (gen_blockage ());
2987 /* Locate the bottom of the register save area. */
2988 cfa_off = (current_frame_info.spill_cfa_off
2989 + current_frame_info.spill_size
2990 + current_frame_info.extra_spill_size);
2992 /* Restore the predicate registers. */
2993 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2995 if (current_frame_info.reg_save_pr != 0)
2996 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2999 alt_regno = next_scratch_gr_reg ();
3000 alt_reg = gen_rtx_REG (DImode, alt_regno);
3001 do_restore (gen_movdi_x, alt_reg, cfa_off);
3004 reg = gen_rtx_REG (DImode, PR_REG (0));
3005 emit_move_insn (reg, alt_reg);
3008 /* Restore the application registers. */
3010 /* Load the saved unat from the stack, but do not restore it until
3011 after the GRs have been restored. */
3012 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3014 if (current_frame_info.reg_save_ar_unat != 0)
3016 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3019 alt_regno = next_scratch_gr_reg ();
3020 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3021 current_frame_info.gr_used_mask |= 1 << alt_regno;
3022 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3027 ar_unat_save_reg = NULL_RTX;
3029 if (current_frame_info.reg_save_ar_pfs != 0)
3031 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3032 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3033 emit_move_insn (reg, alt_reg);
3035 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3037 alt_regno = next_scratch_gr_reg ();
3038 alt_reg = gen_rtx_REG (DImode, alt_regno);
3039 do_restore (gen_movdi_x, alt_reg, cfa_off);
3041 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3042 emit_move_insn (reg, alt_reg);
3045 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3047 if (current_frame_info.reg_save_ar_lc != 0)
3048 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3051 alt_regno = next_scratch_gr_reg ();
3052 alt_reg = gen_rtx_REG (DImode, alt_regno);
3053 do_restore (gen_movdi_x, alt_reg, cfa_off);
3056 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3057 emit_move_insn (reg, alt_reg);
3060 /* We should now be at the base of the gr/br/fr spill area. */
3061 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3062 + current_frame_info.spill_size));
3064 /* The GP may be stored on the stack in the prologue, but it's
3065 never restored in the epilogue. Skip the stack slot. */
3066 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3069 /* Restore all general registers. */
3070 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3071 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3073 reg = gen_rtx_REG (DImode, regno);
3074 do_restore (gen_gr_restore, reg, cfa_off);
3078 /* Restore the branch registers. Handle B0 specially, as it may
3079 have gotten stored in some GR register. */
3080 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3082 if (current_frame_info.reg_save_b0 != 0)
3083 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3086 alt_regno = next_scratch_gr_reg ();
3087 alt_reg = gen_rtx_REG (DImode, alt_regno);
3088 do_restore (gen_movdi_x, alt_reg, cfa_off);
3091 reg = gen_rtx_REG (DImode, BR_REG (0));
3092 emit_move_insn (reg, alt_reg);
3095 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3096 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3098 alt_regno = next_scratch_gr_reg ();
3099 alt_reg = gen_rtx_REG (DImode, alt_regno);
3100 do_restore (gen_movdi_x, alt_reg, cfa_off);
3102 reg = gen_rtx_REG (DImode, regno);
3103 emit_move_insn (reg, alt_reg);
3106 /* Restore floating point registers. */
3107 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3108 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3110 gcc_assert (!(cfa_off & 15));
3111 reg = gen_rtx_REG (XFmode, regno);
3112 do_restore (gen_fr_restore_x, reg, cfa_off);
3116 /* Restore ar.unat for real. */
3117 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3119 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3120 emit_move_insn (reg, ar_unat_save_reg);
3123 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3125 finish_spill_pointers ();
3127 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3129 /* ??? At this point we must generate a magic insn that appears to
3130 modify the spill iterators, the stack pointer, and the frame
3131 pointer. This would allow the most scheduling freedom. For now,
3133 emit_insn (gen_blockage ());
3136 if (cfun->machine->ia64_eh_epilogue_sp)
3137 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3138 else if (frame_pointer_needed)
3140 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3141 RTX_FRAME_RELATED_P (insn) = 1;
3143 else if (current_frame_info.total_size)
3145 rtx offset, frame_size_rtx;
3147 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3148 if (CONST_OK_FOR_I (current_frame_info.total_size))
3149 offset = frame_size_rtx;
3152 regno = next_scratch_gr_reg ();
3153 offset = gen_rtx_REG (DImode, regno);
3154 emit_move_insn (offset, frame_size_rtx);
3157 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3160 RTX_FRAME_RELATED_P (insn) = 1;
3161 if (GET_CODE (offset) != CONST_INT)
3164 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3165 gen_rtx_SET (VOIDmode,
3167 gen_rtx_PLUS (DImode,
3174 if (cfun->machine->ia64_eh_epilogue_bsp)
3175 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3178 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3181 int fp = GR_REG (2);
3182 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3183 first available call clobbered register. If there was a frame_pointer
3184 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3185 so we have to make sure we're using the string "r2" when emitting
3186 the register name for the assembler. */
3187 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3188 fp = HARD_FRAME_POINTER_REGNUM;
3190 /* We must emit an alloc to force the input registers to become output
3191 registers. Otherwise, if the callee tries to pass its parameters
3192 through to another call without an intervening alloc, then these
3194 /* ??? We don't need to preserve all input registers. We only need to
3195 preserve those input registers used as arguments to the sibling call.
3196 It is unclear how to compute that number here. */
3197 if (current_frame_info.n_input_regs != 0)
3199 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3200 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3201 const0_rtx, const0_rtx,
3202 n_inputs, const0_rtx));
3203 RTX_FRAME_RELATED_P (insn) = 1;
3208 /* Return 1 if br.ret can do all the work required to return from a
3212 ia64_direct_return (void)
3214 if (reload_completed && ! frame_pointer_needed)
3216 ia64_compute_frame_size (get_frame_size ());
3218 return (current_frame_info.total_size == 0
3219 && current_frame_info.n_spilled == 0
3220 && current_frame_info.reg_save_b0 == 0
3221 && current_frame_info.reg_save_pr == 0
3222 && current_frame_info.reg_save_ar_pfs == 0
3223 && current_frame_info.reg_save_ar_unat == 0
3224 && current_frame_info.reg_save_ar_lc == 0);
3229 /* Return the magic cookie that we use to hold the return address
3230 during early compilation. */
3233 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3237 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3240 /* Split this value after reload, now that we know where the return
3241 address is saved. */
3244 ia64_split_return_addr_rtx (rtx dest)
3248 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3250 if (current_frame_info.reg_save_b0 != 0)
3251 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3257 /* Compute offset from CFA for BR0. */
3258 /* ??? Must be kept in sync with ia64_expand_prologue. */
3259 off = (current_frame_info.spill_cfa_off
3260 + current_frame_info.spill_size);
3261 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3262 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3265 /* Convert CFA offset to a register based offset. */
3266 if (frame_pointer_needed)
3267 src = hard_frame_pointer_rtx;
3270 src = stack_pointer_rtx;
3271 off += current_frame_info.total_size;
3274 /* Load address into scratch register. */
3275 if (CONST_OK_FOR_I (off))
3276 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3279 emit_move_insn (dest, GEN_INT (off));
3280 emit_insn (gen_adddi3 (dest, src, dest));
3283 src = gen_rtx_MEM (Pmode, dest);
3287 src = gen_rtx_REG (DImode, BR_REG (0));
3289 emit_move_insn (dest, src);
3293 ia64_hard_regno_rename_ok (int from, int to)
3295 /* Don't clobber any of the registers we reserved for the prologue. */
3296 if (to == current_frame_info.reg_fp
3297 || to == current_frame_info.reg_save_b0
3298 || to == current_frame_info.reg_save_pr
3299 || to == current_frame_info.reg_save_ar_pfs
3300 || to == current_frame_info.reg_save_ar_unat
3301 || to == current_frame_info.reg_save_ar_lc)
3304 if (from == current_frame_info.reg_fp
3305 || from == current_frame_info.reg_save_b0
3306 || from == current_frame_info.reg_save_pr
3307 || from == current_frame_info.reg_save_ar_pfs
3308 || from == current_frame_info.reg_save_ar_unat
3309 || from == current_frame_info.reg_save_ar_lc)
3312 /* Don't use output registers outside the register frame. */
3313 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3316 /* Retain even/oddness on predicate register pairs. */
3317 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3318 return (from & 1) == (to & 1);
3323 /* Target hook for assembling integer objects. Handle word-sized
3324 aligned objects and detect the cases when @fptr is needed. */
3327 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3329 if (size == POINTER_SIZE / BITS_PER_UNIT
3330 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3331 && GET_CODE (x) == SYMBOL_REF
3332 && SYMBOL_REF_FUNCTION_P (x))
3334 static const char * const directive[2][2] = {
3335 /* 64-bit pointer */ /* 32-bit pointer */
3336 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3337 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3339 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3340 output_addr_const (asm_out_file, x);
3341 fputs (")\n", asm_out_file);
3344 return default_assemble_integer (x, size, aligned_p);
3347 /* Emit the function prologue. */
3350 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3352 int mask, grsave, grsave_prev;
3354 if (current_frame_info.need_regstk)
3355 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3356 current_frame_info.n_input_regs,
3357 current_frame_info.n_local_regs,
3358 current_frame_info.n_output_regs,
3359 current_frame_info.n_rotate_regs);
3361 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3364 /* Emit the .prologue directive. */
3367 grsave = grsave_prev = 0;
3368 if (current_frame_info.reg_save_b0 != 0)
3371 grsave = grsave_prev = current_frame_info.reg_save_b0;
3373 if (current_frame_info.reg_save_ar_pfs != 0
3374 && (grsave_prev == 0
3375 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3378 if (grsave_prev == 0)
3379 grsave = current_frame_info.reg_save_ar_pfs;
3380 grsave_prev = current_frame_info.reg_save_ar_pfs;
3382 if (current_frame_info.reg_fp != 0
3383 && (grsave_prev == 0
3384 || current_frame_info.reg_fp == grsave_prev + 1))
3387 if (grsave_prev == 0)
3388 grsave = HARD_FRAME_POINTER_REGNUM;
3389 grsave_prev = current_frame_info.reg_fp;
3391 if (current_frame_info.reg_save_pr != 0
3392 && (grsave_prev == 0
3393 || current_frame_info.reg_save_pr == grsave_prev + 1))
3396 if (grsave_prev == 0)
3397 grsave = current_frame_info.reg_save_pr;
3400 if (mask && TARGET_GNU_AS)
3401 fprintf (file, "\t.prologue %d, %d\n", mask,
3402 ia64_dbx_register_number (grsave));
3404 fputs ("\t.prologue\n", file);
3406 /* Emit a .spill directive, if necessary, to relocate the base of
3407 the register spill area. */
3408 if (current_frame_info.spill_cfa_off != -16)
3409 fprintf (file, "\t.spill %ld\n",
3410 (long) (current_frame_info.spill_cfa_off
3411 + current_frame_info.spill_size));
3414 /* Emit the .body directive at the scheduled end of the prologue. */
3417 ia64_output_function_end_prologue (FILE *file)
3419 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3422 fputs ("\t.body\n", file);
3425 /* Emit the function epilogue. */
3428 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3429 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3433 if (current_frame_info.reg_fp)
3435 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3436 reg_names[HARD_FRAME_POINTER_REGNUM]
3437 = reg_names[current_frame_info.reg_fp];
3438 reg_names[current_frame_info.reg_fp] = tmp;
3440 if (! TARGET_REG_NAMES)
3442 for (i = 0; i < current_frame_info.n_input_regs; i++)
3443 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3444 for (i = 0; i < current_frame_info.n_local_regs; i++)
3445 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3446 for (i = 0; i < current_frame_info.n_output_regs; i++)
3447 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3450 current_frame_info.initialized = 0;
3454 ia64_dbx_register_number (int regno)
3456 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3457 from its home at loc79 to something inside the register frame. We
3458 must perform the same renumbering here for the debug info. */
3459 if (current_frame_info.reg_fp)
3461 if (regno == HARD_FRAME_POINTER_REGNUM)
3462 regno = current_frame_info.reg_fp;
3463 else if (regno == current_frame_info.reg_fp)
3464 regno = HARD_FRAME_POINTER_REGNUM;
3467 if (IN_REGNO_P (regno))
3468 return 32 + regno - IN_REG (0);
3469 else if (LOC_REGNO_P (regno))
3470 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3471 else if (OUT_REGNO_P (regno))
3472 return (32 + current_frame_info.n_input_regs
3473 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3479 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3481 rtx addr_reg, eight = GEN_INT (8);
3483 /* The Intel assembler requires that the global __ia64_trampoline symbol
3484 be declared explicitly */
3487 static bool declared_ia64_trampoline = false;
3489 if (!declared_ia64_trampoline)
3491 declared_ia64_trampoline = true;
3492 (*targetm.asm_out.globalize_label) (asm_out_file,
3493 "__ia64_trampoline");
3497 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3498 addr = convert_memory_address (Pmode, addr);
3499 fnaddr = convert_memory_address (Pmode, fnaddr);
3500 static_chain = convert_memory_address (Pmode, static_chain);
3502 /* Load up our iterator. */
3503 addr_reg = gen_reg_rtx (Pmode);
3504 emit_move_insn (addr_reg, addr);
3506 /* The first two words are the fake descriptor:
3507 __ia64_trampoline, ADDR+16. */
3508 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3509 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3510 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3512 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3513 copy_to_reg (plus_constant (addr, 16)));
3514 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3516 /* The third word is the target descriptor. */
3517 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3518 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3520 /* The fourth word is the static chain. */
3521 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3524 /* Do any needed setup for a variadic function. CUM has not been updated
3525 for the last named argument which has type TYPE and mode MODE.
3527 We generate the actual spill instructions during prologue generation. */
3530 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3531 tree type, int * pretend_size,
3532 int second_time ATTRIBUTE_UNUSED)
3534 CUMULATIVE_ARGS next_cum = *cum;
3536 /* Skip the current argument. */
3537 ia64_function_arg_advance (&next_cum, mode, type, 1);
3539 if (next_cum.words < MAX_ARGUMENT_SLOTS)
3541 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3542 *pretend_size = n * UNITS_PER_WORD;
3543 cfun->machine->n_varargs = n;
3547 /* Check whether TYPE is a homogeneous floating point aggregate. If
3548 it is, return the mode of the floating point type that appears
3549 in all leafs. If it is not, return VOIDmode.
3551 An aggregate is a homogeneous floating point aggregate is if all
3552 fields/elements in it have the same floating point type (e.g,
3553 SFmode). 128-bit quad-precision floats are excluded.
3555 Variable sized aggregates should never arrive here, since we should
3556 have already decided to pass them by reference. Top-level zero-sized
3557 aggregates are excluded because our parallels crash the middle-end. */
3559 static enum machine_mode
3560 hfa_element_mode (tree type, bool nested)
3562 enum machine_mode element_mode = VOIDmode;
3563 enum machine_mode mode;
3564 enum tree_code code = TREE_CODE (type);
3565 int know_element_mode = 0;
3568 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3573 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3574 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3575 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3576 case LANG_TYPE: case FUNCTION_TYPE:
3579 /* Fortran complex types are supposed to be HFAs, so we need to handle
3580 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3583 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3584 && TYPE_MODE (type) != TCmode)
3585 return GET_MODE_INNER (TYPE_MODE (type));
3590 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3591 mode if this is contained within an aggregate. */
3592 if (nested && TYPE_MODE (type) != TFmode)
3593 return TYPE_MODE (type);
3598 return hfa_element_mode (TREE_TYPE (type), 1);
3602 case QUAL_UNION_TYPE:
3603 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3605 if (TREE_CODE (t) != FIELD_DECL)
3608 mode = hfa_element_mode (TREE_TYPE (t), 1);
3609 if (know_element_mode)
3611 if (mode != element_mode)
3614 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3618 know_element_mode = 1;
3619 element_mode = mode;
3622 return element_mode;
3625 /* If we reach here, we probably have some front-end specific type
3626 that the backend doesn't know about. This can happen via the
3627 aggregate_value_p call in init_function_start. All we can do is
3628 ignore unknown tree types. */
3635 /* Return the number of words required to hold a quantity of TYPE and MODE
3636 when passed as an argument. */
3638 ia64_function_arg_words (tree type, enum machine_mode mode)
3642 if (mode == BLKmode)
3643 words = int_size_in_bytes (type);
3645 words = GET_MODE_SIZE (mode);
3647 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3650 /* Return the number of registers that should be skipped so the current
3651 argument (described by TYPE and WORDS) will be properly aligned.
3653 Integer and float arguments larger than 8 bytes start at the next
3654 even boundary. Aggregates larger than 8 bytes start at the next
3655 even boundary if the aggregate has 16 byte alignment. Note that
3656 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3657 but are still to be aligned in registers.
3659 ??? The ABI does not specify how to handle aggregates with
3660 alignment from 9 to 15 bytes, or greater than 16. We handle them
3661 all as if they had 16 byte alignment. Such aggregates can occur
3662 only if gcc extensions are used. */
3664 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3666 if ((cum->words & 1) == 0)
3670 && TREE_CODE (type) != INTEGER_TYPE
3671 && TREE_CODE (type) != REAL_TYPE)
3672 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3677 /* Return rtx for register where argument is passed, or zero if it is passed
3679 /* ??? 128-bit quad-precision floats are always passed in general
3683 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3684 int named, int incoming)
3686 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3687 int words = ia64_function_arg_words (type, mode);
3688 int offset = ia64_function_arg_offset (cum, type, words);
3689 enum machine_mode hfa_mode = VOIDmode;
3691 /* If all argument slots are used, then it must go on the stack. */
3692 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3695 /* Check for and handle homogeneous FP aggregates. */
3697 hfa_mode = hfa_element_mode (type, 0);
3699 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3700 and unprototyped hfas are passed specially. */
3701 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3705 int fp_regs = cum->fp_regs;
3706 int int_regs = cum->words + offset;
3707 int hfa_size = GET_MODE_SIZE (hfa_mode);
3711 /* If prototyped, pass it in FR regs then GR regs.
3712 If not prototyped, pass it in both FR and GR regs.
3714 If this is an SFmode aggregate, then it is possible to run out of
3715 FR regs while GR regs are still left. In that case, we pass the
3716 remaining part in the GR regs. */
3718 /* Fill the FP regs. We do this always. We stop if we reach the end
3719 of the argument, the last FP register, or the last argument slot. */
3721 byte_size = ((mode == BLKmode)
3722 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3723 args_byte_size = int_regs * UNITS_PER_WORD;
3725 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3726 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3728 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3729 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3733 args_byte_size += hfa_size;
3737 /* If no prototype, then the whole thing must go in GR regs. */
3738 if (! cum->prototype)
3740 /* If this is an SFmode aggregate, then we might have some left over
3741 that needs to go in GR regs. */
3742 else if (byte_size != offset)
3743 int_regs += offset / UNITS_PER_WORD;
3745 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3747 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3749 enum machine_mode gr_mode = DImode;
3750 unsigned int gr_size;
3752 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3753 then this goes in a GR reg left adjusted/little endian, right
3754 adjusted/big endian. */
3755 /* ??? Currently this is handled wrong, because 4-byte hunks are
3756 always right adjusted/little endian. */
3759 /* If we have an even 4 byte hunk because the aggregate is a
3760 multiple of 4 bytes in size, then this goes in a GR reg right
3761 adjusted/little endian. */
3762 else if (byte_size - offset == 4)
3765 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3766 gen_rtx_REG (gr_mode, (basereg
3770 gr_size = GET_MODE_SIZE (gr_mode);
3772 if (gr_size == UNITS_PER_WORD
3773 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3775 else if (gr_size > UNITS_PER_WORD)
3776 int_regs += gr_size / UNITS_PER_WORD;
3778 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3781 /* Integral and aggregates go in general registers. If we have run out of
3782 FR registers, then FP values must also go in general registers. This can
3783 happen when we have a SFmode HFA. */
3784 else if (mode == TFmode || mode == TCmode
3785 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3787 int byte_size = ((mode == BLKmode)
3788 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3789 if (BYTES_BIG_ENDIAN
3790 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3791 && byte_size < UNITS_PER_WORD
3794 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3795 gen_rtx_REG (DImode,
3796 (basereg + cum->words
3799 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3802 return gen_rtx_REG (mode, basereg + cum->words + offset);
3806 /* If there is a prototype, then FP values go in a FR register when
3807 named, and in a GR register when unnamed. */
3808 else if (cum->prototype)
3811 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3812 /* In big-endian mode, an anonymous SFmode value must be represented
3813 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3814 the value into the high half of the general register. */
3815 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3816 return gen_rtx_PARALLEL (mode,
3818 gen_rtx_EXPR_LIST (VOIDmode,
3819 gen_rtx_REG (DImode, basereg + cum->words + offset),
3822 return gen_rtx_REG (mode, basereg + cum->words + offset);
3824 /* If there is no prototype, then FP values go in both FR and GR
3828 /* See comment above. */
3829 enum machine_mode inner_mode =
3830 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3832 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3833 gen_rtx_REG (mode, (FR_ARG_FIRST
3836 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3837 gen_rtx_REG (inner_mode,
3838 (basereg + cum->words
3842 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3846 /* Return number of bytes, at the beginning of the argument, that must be
3847 put in registers. 0 is the argument is entirely in registers or entirely
3851 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3852 tree type, bool named ATTRIBUTE_UNUSED)
3854 int words = ia64_function_arg_words (type, mode);
3855 int offset = ia64_function_arg_offset (cum, type, words);
3857 /* If all argument slots are used, then it must go on the stack. */
3858 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3861 /* It doesn't matter whether the argument goes in FR or GR regs. If
3862 it fits within the 8 argument slots, then it goes entirely in
3863 registers. If it extends past the last argument slot, then the rest
3864 goes on the stack. */
3866 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3869 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
3872 /* Update CUM to point after this argument. This is patterned after
3873 ia64_function_arg. */
3876 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3877 tree type, int named)
3879 int words = ia64_function_arg_words (type, mode);
3880 int offset = ia64_function_arg_offset (cum, type, words);
3881 enum machine_mode hfa_mode = VOIDmode;
3883 /* If all arg slots are already full, then there is nothing to do. */
3884 if (cum->words >= MAX_ARGUMENT_SLOTS)
3887 cum->words += words + offset;
3889 /* Check for and handle homogeneous FP aggregates. */
3891 hfa_mode = hfa_element_mode (type, 0);
3893 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3894 and unprototyped hfas are passed specially. */
3895 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3897 int fp_regs = cum->fp_regs;
3898 /* This is the original value of cum->words + offset. */
3899 int int_regs = cum->words - words;
3900 int hfa_size = GET_MODE_SIZE (hfa_mode);
3904 /* If prototyped, pass it in FR regs then GR regs.
3905 If not prototyped, pass it in both FR and GR regs.
3907 If this is an SFmode aggregate, then it is possible to run out of
3908 FR regs while GR regs are still left. In that case, we pass the
3909 remaining part in the GR regs. */
3911 /* Fill the FP regs. We do this always. We stop if we reach the end
3912 of the argument, the last FP register, or the last argument slot. */
3914 byte_size = ((mode == BLKmode)
3915 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3916 args_byte_size = int_regs * UNITS_PER_WORD;
3918 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3919 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3922 args_byte_size += hfa_size;
3926 cum->fp_regs = fp_regs;
3929 /* Integral and aggregates go in general registers. So do TFmode FP values.
3930 If we have run out of FR registers, then other FP values must also go in
3931 general registers. This can happen when we have a SFmode HFA. */
3932 else if (mode == TFmode || mode == TCmode
3933 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3934 cum->int_regs = cum->words;
3936 /* If there is a prototype, then FP values go in a FR register when
3937 named, and in a GR register when unnamed. */
3938 else if (cum->prototype)
3941 cum->int_regs = cum->words;
3943 /* ??? Complex types should not reach here. */
3944 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3946 /* If there is no prototype, then FP values go in both FR and GR
3950 /* ??? Complex types should not reach here. */
3951 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3952 cum->int_regs = cum->words;
3956 /* Arguments with alignment larger than 8 bytes start at the next even
3957 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
3958 even though their normal alignment is 8 bytes. See ia64_function_arg. */
3961 ia64_function_arg_boundary (enum machine_mode mode, tree type)
3964 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
3965 return PARM_BOUNDARY * 2;
3969 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
3970 return PARM_BOUNDARY * 2;
3972 return PARM_BOUNDARY;
3975 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
3976 return PARM_BOUNDARY * 2;
3978 return PARM_BOUNDARY;
3981 /* Variable sized types are passed by reference. */
3982 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3985 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3986 enum machine_mode mode ATTRIBUTE_UNUSED,
3987 tree type, bool named ATTRIBUTE_UNUSED)
3989 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3992 /* True if it is OK to do sibling call optimization for the specified
3993 call expression EXP. DECL will be the called function, or NULL if
3994 this is an indirect call. */
3996 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3998 /* We can't perform a sibcall if the current function has the syscall_linkage
4000 if (lookup_attribute ("syscall_linkage",
4001 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4004 /* We must always return with our current GP. This means we can
4005 only sibcall to functions defined in the current module. */
4006 return decl && (*targetm.binds_local_p) (decl);
4010 /* Implement va_arg. */
4013 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4015 /* Variable sized types are passed by reference. */
4016 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4018 tree ptrtype = build_pointer_type (type);
4019 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4020 return build_va_arg_indirect_ref (addr);
4023 /* Aggregate arguments with alignment larger than 8 bytes start at
4024 the next even boundary. Integer and floating point arguments
4025 do so if they are larger than 8 bytes, whether or not they are
4026 also aligned larger than 8 bytes. */
4027 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4028 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4030 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4031 build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4032 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4033 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4034 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4035 gimplify_and_add (t, pre_p);
4038 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4041 /* Return 1 if function return value returned in memory. Return 0 if it is
4045 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4047 enum machine_mode mode;
4048 enum machine_mode hfa_mode;
4049 HOST_WIDE_INT byte_size;
4051 mode = TYPE_MODE (valtype);
4052 byte_size = GET_MODE_SIZE (mode);
4053 if (mode == BLKmode)
4055 byte_size = int_size_in_bytes (valtype);
4060 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4062 hfa_mode = hfa_element_mode (valtype, 0);
4063 if (hfa_mode != VOIDmode)
4065 int hfa_size = GET_MODE_SIZE (hfa_mode);
4067 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4072 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4078 /* Return rtx for register that holds the function return value. */
4081 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4083 enum machine_mode mode;
4084 enum machine_mode hfa_mode;
4086 mode = TYPE_MODE (valtype);
4087 hfa_mode = hfa_element_mode (valtype, 0);
4089 if (hfa_mode != VOIDmode)
4097 hfa_size = GET_MODE_SIZE (hfa_mode);
4098 byte_size = ((mode == BLKmode)
4099 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4101 for (i = 0; offset < byte_size; i++)
4103 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4104 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4108 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4110 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4111 return gen_rtx_REG (mode, FR_ARG_FIRST);
4114 bool need_parallel = false;
4116 /* In big-endian mode, we need to manage the layout of aggregates
4117 in the registers so that we get the bits properly aligned in
4118 the highpart of the registers. */
4119 if (BYTES_BIG_ENDIAN
4120 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4121 need_parallel = true;
4123 /* Something like struct S { long double x; char a[0] } is not an
4124 HFA structure, and therefore doesn't go in fp registers. But
4125 the middle-end will give it XFmode anyway, and XFmode values
4126 don't normally fit in integer registers. So we need to smuggle
4127 the value inside a parallel. */
4128 else if (mode == XFmode || mode == XCmode)
4129 need_parallel = true;
4139 bytesize = int_size_in_bytes (valtype);
4140 /* An empty PARALLEL is invalid here, but the return value
4141 doesn't matter for empty structs. */
4143 return gen_rtx_REG (mode, GR_RET_FIRST);
4144 for (i = 0; offset < bytesize; i++)
4146 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4147 gen_rtx_REG (DImode,
4150 offset += UNITS_PER_WORD;
4152 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4155 return gen_rtx_REG (mode, GR_RET_FIRST);
4159 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4160 We need to emit DTP-relative relocations. */
4163 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4165 gcc_assert (size == 8);
4166 fputs ("\tdata8.ua\t@dtprel(", file);
4167 output_addr_const (file, x);
4171 /* Print a memory address as an operand to reference that memory location. */
4173 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4174 also call this from ia64_print_operand for memory addresses. */
4177 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4178 rtx address ATTRIBUTE_UNUSED)
4182 /* Print an operand to an assembler instruction.
4183 C Swap and print a comparison operator.
4184 D Print an FP comparison operator.
4185 E Print 32 - constant, for SImode shifts as extract.
4186 e Print 64 - constant, for DImode rotates.
4187 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4188 a floating point register emitted normally.
4189 I Invert a predicate register by adding 1.
4190 J Select the proper predicate register for a condition.
4191 j Select the inverse predicate register for a condition.
4192 O Append .acq for volatile load.
4193 P Postincrement of a MEM.
4194 Q Append .rel for volatile store.
4195 S Shift amount for shladd instruction.
4196 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4197 for Intel assembler.
4198 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4199 for Intel assembler.
4200 r Print register name, or constant 0 as r0. HP compatibility for
4202 v Print vector constant value as an 8-byte integer value. */
4205 ia64_print_operand (FILE * file, rtx x, int code)
4212 /* Handled below. */
4217 enum rtx_code c = swap_condition (GET_CODE (x));
4218 fputs (GET_RTX_NAME (c), file);
4223 switch (GET_CODE (x))
4235 str = GET_RTX_NAME (GET_CODE (x));
4242 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4250 if (x == CONST0_RTX (GET_MODE (x)))
4251 str = reg_names [FR_REG (0)];
4252 else if (x == CONST1_RTX (GET_MODE (x)))
4253 str = reg_names [FR_REG (1)];
4256 gcc_assert (GET_CODE (x) == REG);
4257 str = reg_names [REGNO (x)];
4263 fputs (reg_names [REGNO (x) + 1], file);
4269 unsigned int regno = REGNO (XEXP (x, 0));
4270 if (GET_CODE (x) == EQ)
4274 fputs (reg_names [regno], file);
4279 if (MEM_VOLATILE_P (x))
4280 fputs(".acq", file);
4285 HOST_WIDE_INT value;
4287 switch (GET_CODE (XEXP (x, 0)))
4293 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4294 if (GET_CODE (x) == CONST_INT)
4298 gcc_assert (GET_CODE (x) == REG);
4299 fprintf (file, ", %s", reg_names[REGNO (x)]);
4305 value = GET_MODE_SIZE (GET_MODE (x));
4309 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4313 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4318 if (MEM_VOLATILE_P (x))
4319 fputs(".rel", file);
4323 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4327 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4329 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4335 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4337 const char *prefix = "0x";
4338 if (INTVAL (x) & 0x80000000)
4340 fprintf (file, "0xffffffff");
4343 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4349 /* If this operand is the constant zero, write it as register zero.
4350 Any register, zero, or CONST_INT value is OK here. */
4351 if (GET_CODE (x) == REG)
4352 fputs (reg_names[REGNO (x)], file);
4353 else if (x == CONST0_RTX (GET_MODE (x)))
4355 else if (GET_CODE (x) == CONST_INT)
4356 output_addr_const (file, x);
4358 output_operand_lossage ("invalid %%r value");
4362 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4363 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4370 /* For conditional branches, returns or calls, substitute
4371 sptk, dptk, dpnt, or spnt for %s. */
4372 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4375 int pred_val = INTVAL (XEXP (x, 0));
4377 /* Guess top and bottom 10% statically predicted. */
4378 if (pred_val < REG_BR_PROB_BASE / 50)
4380 else if (pred_val < REG_BR_PROB_BASE / 2)
4382 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4387 else if (GET_CODE (current_output_insn) == CALL_INSN)
4392 fputs (which, file);
4397 x = current_insn_predicate;
4400 unsigned int regno = REGNO (XEXP (x, 0));
4401 if (GET_CODE (x) == EQ)
4403 fprintf (file, "(%s) ", reg_names [regno]);
4408 output_operand_lossage ("ia64_print_operand: unknown code");
4412 switch (GET_CODE (x))
4414 /* This happens for the spill/restore instructions. */
4419 /* ... fall through ... */
4422 fputs (reg_names [REGNO (x)], file);
4427 rtx addr = XEXP (x, 0);
4428 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4429 addr = XEXP (addr, 0);
4430 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4435 output_addr_const (file, x);
4442 /* Compute a (partial) cost for rtx X. Return true if the complete
4443 cost has been computed, and false if subexpressions should be
4444 scanned. In either case, *TOTAL contains the cost result. */
4445 /* ??? This is incomplete. */
4448 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4456 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4459 if (CONST_OK_FOR_I (INTVAL (x)))
4461 else if (CONST_OK_FOR_J (INTVAL (x)))
4464 *total = COSTS_N_INSNS (1);
4467 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4470 *total = COSTS_N_INSNS (1);
4475 *total = COSTS_N_INSNS (1);
4481 *total = COSTS_N_INSNS (3);
4485 /* For multiplies wider than HImode, we have to go to the FPU,
4486 which normally involves copies. Plus there's the latency
4487 of the multiply itself, and the latency of the instructions to
4488 transfer integer regs to FP regs. */
4489 /* ??? Check for FP mode. */
4490 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4491 *total = COSTS_N_INSNS (10);
4493 *total = COSTS_N_INSNS (2);
4501 *total = COSTS_N_INSNS (1);
4508 /* We make divide expensive, so that divide-by-constant will be
4509 optimized to a multiply. */
4510 *total = COSTS_N_INSNS (60);
4518 /* Calculate the cost of moving data from a register in class FROM to
4519 one in class TO, using MODE. */
4522 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4525 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4526 if (to == ADDL_REGS)
4528 if (from == ADDL_REGS)
4531 /* All costs are symmetric, so reduce cases by putting the
4532 lower number class as the destination. */
4535 enum reg_class tmp = to;
4536 to = from, from = tmp;
4539 /* Moving from FR<->GR in XFmode must be more expensive than 2,
4540 so that we get secondary memory reloads. Between FR_REGS,
4541 we have to make this at least as expensive as MEMORY_MOVE_COST
4542 to avoid spectacularly poor register class preferencing. */
4545 if (to != GR_REGS || from != GR_REGS)
4546 return MEMORY_MOVE_COST (mode, to, 0);
4554 /* Moving between PR registers takes two insns. */
4555 if (from == PR_REGS)
4557 /* Moving between PR and anything but GR is impossible. */
4558 if (from != GR_REGS)
4559 return MEMORY_MOVE_COST (mode, to, 0);
4563 /* Moving between BR and anything but GR is impossible. */
4564 if (from != GR_REGS && from != GR_AND_BR_REGS)
4565 return MEMORY_MOVE_COST (mode, to, 0);
4570 /* Moving between AR and anything but GR is impossible. */
4571 if (from != GR_REGS)
4572 return MEMORY_MOVE_COST (mode, to, 0);
4577 case GR_AND_FR_REGS:
4578 case GR_AND_BR_REGS:
4589 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
4590 to use when copying X into that class. */
4593 ia64_preferred_reload_class (rtx x, enum reg_class class)
4598 /* Don't allow volatile mem reloads into floating point registers.
4599 This is defined to force reload to choose the r/m case instead
4600 of the f/f case when reloading (set (reg fX) (mem/v)). */
4601 if (MEM_P (x) && MEM_VOLATILE_P (x))
4604 /* Force all unrecognized constants into the constant pool. */
4622 /* This function returns the register class required for a secondary
4623 register when copying between one of the registers in CLASS, and X,
4624 using MODE. A return value of NO_REGS means that no secondary register
4628 ia64_secondary_reload_class (enum reg_class class,
4629 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4633 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4634 regno = true_regnum (x);
4641 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4642 interaction. We end up with two pseudos with overlapping lifetimes
4643 both of which are equiv to the same constant, and both which need
4644 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4645 changes depending on the path length, which means the qty_first_reg
4646 check in make_regs_eqv can give different answers at different times.
4647 At some point I'll probably need a reload_indi pattern to handle
4650 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4651 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4652 non-general registers for good measure. */
4653 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4656 /* This is needed if a pseudo used as a call_operand gets spilled to a
4658 if (GET_CODE (x) == MEM)
4663 /* Need to go through general registers to get to other class regs. */
4664 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4667 /* This can happen when a paradoxical subreg is an operand to the
4669 /* ??? This shouldn't be necessary after instruction scheduling is
4670 enabled, because paradoxical subregs are not accepted by
4671 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4672 stop the paradoxical subreg stupidity in the *_operand functions
4674 if (GET_CODE (x) == MEM
4675 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4676 || GET_MODE (x) == QImode))
4679 /* This can happen because of the ior/and/etc patterns that accept FP
4680 registers as operands. If the third operand is a constant, then it
4681 needs to be reloaded into a FP register. */
4682 if (GET_CODE (x) == CONST_INT)
4685 /* This can happen because of register elimination in a muldi3 insn.
4686 E.g. `26107 * (unsigned long)&u'. */
4687 if (GET_CODE (x) == PLUS)
4692 /* ??? This happens if we cse/gcse a BImode value across a call,
4693 and the function has a nonlocal goto. This is because global
4694 does not allocate call crossing pseudos to hard registers when
4695 current_function_has_nonlocal_goto is true. This is relatively
4696 common for C++ programs that use exceptions. To reproduce,
4697 return NO_REGS and compile libstdc++. */
4698 if (GET_CODE (x) == MEM)
4701 /* This can happen when we take a BImode subreg of a DImode value,
4702 and that DImode value winds up in some non-GR register. */
4703 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4715 /* Emit text to declare externally defined variables and functions, because
4716 the Intel assembler does not support undefined externals. */
4719 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4721 int save_referenced;
4723 /* GNU as does not need anything here, but the HP linker does need
4724 something for external functions. */
4728 || TREE_CODE (decl) != FUNCTION_DECL
4729 || strstr (name, "__builtin_") == name))
4732 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4733 the linker when we do this, so we need to be careful not to do this for
4734 builtin functions which have no library equivalent. Unfortunately, we
4735 can't tell here whether or not a function will actually be called by
4736 expand_expr, so we pull in library functions even if we may not need
4738 if (! strcmp (name, "__builtin_next_arg")
4739 || ! strcmp (name, "alloca")
4740 || ! strcmp (name, "__builtin_constant_p")
4741 || ! strcmp (name, "__builtin_args_info"))
4745 ia64_hpux_add_extern_decl (decl);
4748 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4750 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4751 if (TREE_CODE (decl) == FUNCTION_DECL)
4752 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4753 (*targetm.asm_out.globalize_label) (file, name);
4754 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4758 /* Parse the -mfixed-range= option string. */
4761 fix_range (const char *const_str)
4764 char *str, *dash, *comma;
4766 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4767 REG2 are either register names or register numbers. The effect
4768 of this option is to mark the registers in the range from REG1 to
4769 REG2 as ``fixed'' so they won't be used by the compiler. This is
4770 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4772 i = strlen (const_str);
4773 str = (char *) alloca (i + 1);
4774 memcpy (str, const_str, i + 1);
4778 dash = strchr (str, '-');
4781 warning (0, "value of -mfixed-range must have form REG1-REG2");
4786 comma = strchr (dash + 1, ',');
4790 first = decode_reg_name (str);
4793 warning (0, "unknown register name: %s", str);
4797 last = decode_reg_name (dash + 1);
4800 warning (0, "unknown register name: %s", dash + 1);
4808 warning (0, "%s-%s is an empty range", str, dash + 1);
4812 for (i = first; i <= last; ++i)
4813 fixed_regs[i] = call_used_regs[i] = 1;
4823 /* Implement TARGET_HANDLE_OPTION. */
4826 ia64_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
4830 case OPT_mfixed_range_:
4834 case OPT_mtls_size_:
4837 unsigned long tmp = strtoul (arg, &end, 10);
4838 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4839 error ("bad value %<%s%> for -mtls-size= switch", arg);
4841 ia64_tls_size = tmp;
4849 const char *name; /* processor name or nickname. */
4850 enum processor_type processor;
4852 const processor_alias_table[] =
4854 {"itanium", PROCESSOR_ITANIUM},
4855 {"itanium1", PROCESSOR_ITANIUM},
4856 {"merced", PROCESSOR_ITANIUM},
4857 {"itanium2", PROCESSOR_ITANIUM2},
4858 {"mckinley", PROCESSOR_ITANIUM2},
4860 int const pta_size = ARRAY_SIZE (processor_alias_table);
4863 for (i = 0; i < pta_size; i++)
4864 if (!strcmp (arg, processor_alias_table[i].name))
4866 ia64_tune = processor_alias_table[i].processor;
4870 error ("bad value %<%s%> for -mtune= switch", arg);
4879 /* Handle TARGET_OPTIONS switches. */
4882 ia64_override_options (void)
4884 if (TARGET_AUTO_PIC)
4885 target_flags |= MASK_CONST_GP;
4887 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
4889 warning (0, "not yet implemented: latency-optimized inline square root");
4890 TARGET_INLINE_SQRT = INL_MAX_THR;
4893 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4894 flag_schedule_insns_after_reload = 0;
4896 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4898 init_machine_status = ia64_init_machine_status;
4901 static struct machine_function *
4902 ia64_init_machine_status (void)
4904 return ggc_alloc_cleared (sizeof (struct machine_function));
4907 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4908 static enum attr_type ia64_safe_type (rtx);
4910 static enum attr_itanium_class
4911 ia64_safe_itanium_class (rtx insn)
4913 if (recog_memoized (insn) >= 0)
4914 return get_attr_itanium_class (insn);
4916 return ITANIUM_CLASS_UNKNOWN;
4919 static enum attr_type
4920 ia64_safe_type (rtx insn)
4922 if (recog_memoized (insn) >= 0)
4923 return get_attr_type (insn);
4925 return TYPE_UNKNOWN;
4928 /* The following collection of routines emit instruction group stop bits as
4929 necessary to avoid dependencies. */
4931 /* Need to track some additional registers as far as serialization is
4932 concerned so we can properly handle br.call and br.ret. We could
4933 make these registers visible to gcc, but since these registers are
4934 never explicitly used in gcc generated code, it seems wasteful to
4935 do so (plus it would make the call and return patterns needlessly
4937 #define REG_RP (BR_REG (0))
4938 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4939 /* This is used for volatile asms which may require a stop bit immediately
4940 before and after them. */
4941 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4942 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4943 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4945 /* For each register, we keep track of how it has been written in the
4946 current instruction group.
4948 If a register is written unconditionally (no qualifying predicate),
4949 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4951 If a register is written if its qualifying predicate P is true, we
4952 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4953 may be written again by the complement of P (P^1) and when this happens,
4954 WRITE_COUNT gets set to 2.
4956 The result of this is that whenever an insn attempts to write a register
4957 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4959 If a predicate register is written by a floating-point insn, we set
4960 WRITTEN_BY_FP to true.
4962 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4963 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4965 struct reg_write_state
4967 unsigned int write_count : 2;
4968 unsigned int first_pred : 16;
4969 unsigned int written_by_fp : 1;
4970 unsigned int written_by_and : 1;
4971 unsigned int written_by_or : 1;
4974 /* Cumulative info for the current instruction group. */
4975 struct reg_write_state rws_sum[NUM_REGS];
4976 /* Info for the current instruction. This gets copied to rws_sum after a
4977 stop bit is emitted. */
4978 struct reg_write_state rws_insn[NUM_REGS];
4980 /* Indicates whether this is the first instruction after a stop bit,
4981 in which case we don't need another stop bit. Without this,
4982 ia64_variable_issue will die when scheduling an alloc. */
4983 static int first_instruction;
4985 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4986 RTL for one instruction. */
4989 unsigned int is_write : 1; /* Is register being written? */
4990 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4991 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4992 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4993 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4994 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4997 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4998 static int rws_access_regno (int, struct reg_flags, int);
4999 static int rws_access_reg (rtx, struct reg_flags, int);
5000 static void update_set_flags (rtx, struct reg_flags *);
5001 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5002 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5003 static void init_insn_group_barriers (void);
5004 static int group_barrier_needed (rtx);
5005 static int safe_group_barrier_needed (rtx);
5007 /* Update *RWS for REGNO, which is being written by the current instruction,
5008 with predicate PRED, and associated register flags in FLAGS. */
5011 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5014 rws[regno].write_count++;
5016 rws[regno].write_count = 2;
5017 rws[regno].written_by_fp |= flags.is_fp;
5018 /* ??? Not tracking and/or across differing predicates. */
5019 rws[regno].written_by_and = flags.is_and;
5020 rws[regno].written_by_or = flags.is_or;
5021 rws[regno].first_pred = pred;
5024 /* Handle an access to register REGNO of type FLAGS using predicate register
5025 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
5026 a dependency with an earlier instruction in the same group. */
5029 rws_access_regno (int regno, struct reg_flags flags, int pred)
5031 int need_barrier = 0;
5033 gcc_assert (regno < NUM_REGS);
5035 if (! PR_REGNO_P (regno))
5036 flags.is_and = flags.is_or = 0;
5042 /* One insn writes same reg multiple times? */
5043 gcc_assert (!rws_insn[regno].write_count);
5045 /* Update info for current instruction. */
5046 rws_update (rws_insn, regno, flags, pred);
5047 write_count = rws_sum[regno].write_count;
5049 switch (write_count)
5052 /* The register has not been written yet. */
5053 rws_update (rws_sum, regno, flags, pred);
5057 /* The register has been written via a predicate. If this is
5058 not a complementary predicate, then we need a barrier. */
5059 /* ??? This assumes that P and P+1 are always complementary
5060 predicates for P even. */
5061 if (flags.is_and && rws_sum[regno].written_by_and)
5063 else if (flags.is_or && rws_sum[regno].written_by_or)
5065 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5067 rws_update (rws_sum, regno, flags, pred);
5071 /* The register has been unconditionally written already. We
5073 if (flags.is_and && rws_sum[regno].written_by_and)
5075 else if (flags.is_or && rws_sum[regno].written_by_or)
5079 rws_sum[regno].written_by_and = flags.is_and;
5080 rws_sum[regno].written_by_or = flags.is_or;
5089 if (flags.is_branch)
5091 /* Branches have several RAW exceptions that allow to avoid
5094 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5095 /* RAW dependencies on branch regs are permissible as long
5096 as the writer is a non-branch instruction. Since we
5097 never generate code that uses a branch register written
5098 by a branch instruction, handling this case is
5102 if (REGNO_REG_CLASS (regno) == PR_REGS
5103 && ! rws_sum[regno].written_by_fp)
5104 /* The predicates of a branch are available within the
5105 same insn group as long as the predicate was written by
5106 something other than a floating-point instruction. */
5110 if (flags.is_and && rws_sum[regno].written_by_and)
5112 if (flags.is_or && rws_sum[regno].written_by_or)
5115 switch (rws_sum[regno].write_count)
5118 /* The register has not been written yet. */
5122 /* The register has been written via a predicate. If this is
5123 not a complementary predicate, then we need a barrier. */
5124 /* ??? This assumes that P and P+1 are always complementary
5125 predicates for P even. */
5126 if ((rws_sum[regno].first_pred ^ 1) != pred)
5131 /* The register has been unconditionally written already. We
5141 return need_barrier;
5145 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5147 int regno = REGNO (reg);
5148 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5151 return rws_access_regno (regno, flags, pred);
5154 int need_barrier = 0;
5156 need_barrier |= rws_access_regno (regno + n, flags, pred);
5157 return need_barrier;
5161 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5162 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5165 update_set_flags (rtx x, struct reg_flags *pflags)
5167 rtx src = SET_SRC (x);
5169 switch (GET_CODE (src))
5175 if (SET_DEST (x) == pc_rtx)
5176 /* X is a conditional branch. */
5180 /* X is a conditional move. */
5181 rtx cond = XEXP (src, 0);
5182 cond = XEXP (cond, 0);
5184 /* We always split conditional moves into COND_EXEC patterns, so the
5185 only pattern that can reach here is doloop_end_internal. We don't
5186 need to do anything special for this pattern. */
5187 gcc_assert (GET_CODE (cond) == REG && REGNO (cond) == AR_LC_REGNUM);
5192 if (COMPARISON_P (src)
5193 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5194 /* Set pflags->is_fp to 1 so that we know we're dealing
5195 with a floating point comparison when processing the
5196 destination of the SET. */
5199 /* Discover if this is a parallel comparison. We only handle
5200 and.orcm and or.andcm at present, since we must retain a
5201 strict inverse on the predicate pair. */
5202 else if (GET_CODE (src) == AND)
5204 else if (GET_CODE (src) == IOR)
5211 /* Subroutine of rtx_needs_barrier; this function determines whether the
5212 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5213 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5217 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5219 int need_barrier = 0;
5221 rtx src = SET_SRC (x);
5223 if (GET_CODE (src) == CALL)
5224 /* We don't need to worry about the result registers that
5225 get written by subroutine call. */
5226 return rtx_needs_barrier (src, flags, pred);
5227 else if (SET_DEST (x) == pc_rtx)
5229 /* X is a conditional branch. */
5230 /* ??? This seems redundant, as the caller sets this bit for
5232 flags.is_branch = 1;
5233 return rtx_needs_barrier (src, flags, pred);
5236 need_barrier = rtx_needs_barrier (src, flags, pred);
5239 if (GET_CODE (dst) == ZERO_EXTRACT)
5241 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5242 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5243 dst = XEXP (dst, 0);
5245 return need_barrier;
5248 /* Handle an access to rtx X of type FLAGS using predicate register
5249 PRED. Return 1 if this access creates a dependency with an earlier
5250 instruction in the same group. */
5253 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5256 int is_complemented = 0;
5257 int need_barrier = 0;
5258 const char *format_ptr;
5259 struct reg_flags new_flags;
5267 switch (GET_CODE (x))
5270 update_set_flags (x, &new_flags);
5271 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5272 if (GET_CODE (SET_SRC (x)) != CALL)
5274 new_flags.is_write = 1;
5275 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5280 new_flags.is_write = 0;
5281 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5283 /* Avoid multiple register writes, in case this is a pattern with
5284 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5285 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5287 new_flags.is_write = 1;
5288 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5289 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5290 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5295 /* X is a predicated instruction. */
5297 cond = COND_EXEC_TEST (x);
5299 need_barrier = rtx_needs_barrier (cond, flags, 0);
5301 if (GET_CODE (cond) == EQ)
5302 is_complemented = 1;
5303 cond = XEXP (cond, 0);
5304 gcc_assert (GET_CODE (cond) == REG
5305 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5306 pred = REGNO (cond);
5307 if (is_complemented)
5310 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5311 return need_barrier;
5315 /* Clobber & use are for earlier compiler-phases only. */
5320 /* We always emit stop bits for traditional asms. We emit stop bits
5321 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5322 if (GET_CODE (x) != ASM_OPERANDS
5323 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5325 /* Avoid writing the register multiple times if we have multiple
5326 asm outputs. This avoids a failure in rws_access_reg. */
5327 if (! rws_insn[REG_VOLATILE].write_count)
5329 new_flags.is_write = 1;
5330 rws_access_regno (REG_VOLATILE, new_flags, pred);
5335 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5336 We cannot just fall through here since then we would be confused
5337 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5338 traditional asms unlike their normal usage. */
5340 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5341 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5346 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5348 rtx pat = XVECEXP (x, 0, i);
5349 switch (GET_CODE (pat))
5352 update_set_flags (pat, &new_flags);
5353 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5359 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5370 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5372 rtx pat = XVECEXP (x, 0, i);
5373 if (GET_CODE (pat) == SET)
5375 if (GET_CODE (SET_SRC (pat)) != CALL)
5377 new_flags.is_write = 1;
5378 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5382 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5383 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5388 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5391 if (REGNO (x) == AR_UNAT_REGNUM)
5393 for (i = 0; i < 64; ++i)
5394 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5397 need_barrier = rws_access_reg (x, flags, pred);
5401 /* Find the regs used in memory address computation. */
5402 new_flags.is_write = 0;
5403 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5406 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
5407 case SYMBOL_REF: case LABEL_REF: case CONST:
5410 /* Operators with side-effects. */
5411 case POST_INC: case POST_DEC:
5412 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5414 new_flags.is_write = 0;
5415 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5416 new_flags.is_write = 1;
5417 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5421 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5423 new_flags.is_write = 0;
5424 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5425 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5426 new_flags.is_write = 1;
5427 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5430 /* Handle common unary and binary ops for efficiency. */
5431 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5432 case MOD: case UDIV: case UMOD: case AND: case IOR:
5433 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5434 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5435 case NE: case EQ: case GE: case GT: case LE:
5436 case LT: case GEU: case GTU: case LEU: case LTU:
5437 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5438 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5441 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5442 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5443 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5444 case SQRT: case FFS: case POPCOUNT:
5445 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5449 /* VEC_SELECT's second argument is a PARALLEL with integers that
5450 describe the elements selected. On ia64, those integers are
5451 always constants. Avoid walking the PARALLEL so that we don't
5452 get confused with "normal" parallels and then die. */
5453 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5457 switch (XINT (x, 1))
5459 case UNSPEC_LTOFF_DTPMOD:
5460 case UNSPEC_LTOFF_DTPREL:
5462 case UNSPEC_LTOFF_TPREL:
5464 case UNSPEC_PRED_REL_MUTEX:
5465 case UNSPEC_PIC_CALL:
5467 case UNSPEC_FETCHADD_ACQ:
5468 case UNSPEC_BSP_VALUE:
5469 case UNSPEC_FLUSHRS:
5470 case UNSPEC_BUNDLE_SELECTOR:
5473 case UNSPEC_GR_SPILL:
5474 case UNSPEC_GR_RESTORE:
5476 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5477 HOST_WIDE_INT bit = (offset >> 3) & 63;
5479 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5480 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5481 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5486 case UNSPEC_FR_SPILL:
5487 case UNSPEC_FR_RESTORE:
5488 case UNSPEC_GETF_EXP:
5489 case UNSPEC_SETF_EXP:
5491 case UNSPEC_FR_SQRT_RECIP_APPROX:
5492 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5495 case UNSPEC_FR_RECIP_APPROX:
5497 case UNSPEC_COPYSIGN:
5498 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5499 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5502 case UNSPEC_CMPXCHG_ACQ:
5503 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5504 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5512 case UNSPEC_VOLATILE:
5513 switch (XINT (x, 1))
5516 /* Alloc must always be the first instruction of a group.
5517 We force this by always returning true. */
5518 /* ??? We might get better scheduling if we explicitly check for
5519 input/local/output register dependencies, and modify the
5520 scheduler so that alloc is always reordered to the start of
5521 the current group. We could then eliminate all of the
5522 first_instruction code. */
5523 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5525 new_flags.is_write = 1;
5526 rws_access_regno (REG_AR_CFM, new_flags, pred);
5529 case UNSPECV_SET_BSP:
5533 case UNSPECV_BLOCKAGE:
5534 case UNSPECV_INSN_GROUP_BARRIER:
5536 case UNSPECV_PSAC_ALL:
5537 case UNSPECV_PSAC_NORMAL:
5546 new_flags.is_write = 0;
5547 need_barrier = rws_access_regno (REG_RP, flags, pred);
5548 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5550 new_flags.is_write = 1;
5551 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5552 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5556 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5557 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5558 switch (format_ptr[i])
5560 case '0': /* unused field */
5561 case 'i': /* integer */
5562 case 'n': /* note */
5563 case 'w': /* wide integer */
5564 case 's': /* pointer to string */
5565 case 'S': /* optional pointer to string */
5569 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5574 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5575 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5584 return need_barrier;
5587 /* Clear out the state for group_barrier_needed at the start of a
5588 sequence of insns. */
5591 init_insn_group_barriers (void)
5593 memset (rws_sum, 0, sizeof (rws_sum));
5594 first_instruction = 1;
5597 /* Given the current state, determine whether a group barrier (a stop bit) is
5598 necessary before INSN. Return nonzero if so. This modifies the state to
5599 include the effects of INSN as a side-effect. */
5602 group_barrier_needed (rtx insn)
5605 int need_barrier = 0;
5606 struct reg_flags flags;
5608 memset (&flags, 0, sizeof (flags));
5609 switch (GET_CODE (insn))
5615 /* A barrier doesn't imply an instruction group boundary. */
5619 memset (rws_insn, 0, sizeof (rws_insn));
5623 flags.is_branch = 1;
5624 flags.is_sibcall = SIBLING_CALL_P (insn);
5625 memset (rws_insn, 0, sizeof (rws_insn));
5627 /* Don't bundle a call following another call. */
5628 if ((pat = prev_active_insn (insn))
5629 && GET_CODE (pat) == CALL_INSN)
5635 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5639 flags.is_branch = 1;
5641 /* Don't bundle a jump following a call. */
5642 if ((pat = prev_active_insn (insn))
5643 && GET_CODE (pat) == CALL_INSN)
5651 if (GET_CODE (PATTERN (insn)) == USE
5652 || GET_CODE (PATTERN (insn)) == CLOBBER)
5653 /* Don't care about USE and CLOBBER "insns"---those are used to
5654 indicate to the optimizer that it shouldn't get rid of
5655 certain operations. */
5658 pat = PATTERN (insn);
5660 /* Ug. Hack hacks hacked elsewhere. */
5661 switch (recog_memoized (insn))
5663 /* We play dependency tricks with the epilogue in order
5664 to get proper schedules. Undo this for dv analysis. */
5665 case CODE_FOR_epilogue_deallocate_stack:
5666 case CODE_FOR_prologue_allocate_stack:
5667 pat = XVECEXP (pat, 0, 0);
5670 /* The pattern we use for br.cloop confuses the code above.
5671 The second element of the vector is representative. */
5672 case CODE_FOR_doloop_end_internal:
5673 pat = XVECEXP (pat, 0, 1);
5676 /* Doesn't generate code. */
5677 case CODE_FOR_pred_rel_mutex:
5678 case CODE_FOR_prologue_use:
5685 memset (rws_insn, 0, sizeof (rws_insn));
5686 need_barrier = rtx_needs_barrier (pat, flags, 0);
5688 /* Check to see if the previous instruction was a volatile
5691 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5698 if (first_instruction && INSN_P (insn)
5699 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5700 && GET_CODE (PATTERN (insn)) != USE
5701 && GET_CODE (PATTERN (insn)) != CLOBBER)
5704 first_instruction = 0;
5707 return need_barrier;
5710 /* Like group_barrier_needed, but do not clobber the current state. */
5713 safe_group_barrier_needed (rtx insn)
5715 struct reg_write_state rws_saved[NUM_REGS];
5716 int saved_first_instruction;
5719 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5720 saved_first_instruction = first_instruction;
5722 t = group_barrier_needed (insn);
5724 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5725 first_instruction = saved_first_instruction;
5730 /* Scan the current function and insert stop bits as necessary to
5731 eliminate dependencies. This function assumes that a final
5732 instruction scheduling pass has been run which has already
5733 inserted most of the necessary stop bits. This function only
5734 inserts new ones at basic block boundaries, since these are
5735 invisible to the scheduler. */
5738 emit_insn_group_barriers (FILE *dump)
5742 int insns_since_last_label = 0;
5744 init_insn_group_barriers ();
5746 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5748 if (GET_CODE (insn) == CODE_LABEL)
5750 if (insns_since_last_label)
5752 insns_since_last_label = 0;
5754 else if (GET_CODE (insn) == NOTE
5755 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5757 if (insns_since_last_label)
5759 insns_since_last_label = 0;
5761 else if (GET_CODE (insn) == INSN
5762 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5763 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5765 init_insn_group_barriers ();
5768 else if (INSN_P (insn))
5770 insns_since_last_label = 1;
5772 if (group_barrier_needed (insn))
5777 fprintf (dump, "Emitting stop before label %d\n",
5778 INSN_UID (last_label));
5779 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5782 init_insn_group_barriers ();
5790 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5791 This function has to emit all necessary group barriers. */
5794 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5798 init_insn_group_barriers ();
5800 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5802 if (GET_CODE (insn) == BARRIER)
5804 rtx last = prev_active_insn (insn);
5808 if (GET_CODE (last) == JUMP_INSN
5809 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5810 last = prev_active_insn (last);
5811 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5812 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5814 init_insn_group_barriers ();
5816 else if (INSN_P (insn))
5818 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5819 init_insn_group_barriers ();
5820 else if (group_barrier_needed (insn))
5822 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5823 init_insn_group_barriers ();
5824 group_barrier_needed (insn);
5832 /* Instruction scheduling support. */
5834 #define NR_BUNDLES 10
5836 /* A list of names of all available bundles. */
5838 static const char *bundle_name [NR_BUNDLES] =
5844 #if NR_BUNDLES == 10
5854 /* Nonzero if we should insert stop bits into the schedule. */
5856 int ia64_final_schedule = 0;
5858 /* Codes of the corresponding queried units: */
5860 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5861 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5863 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5864 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5866 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5868 /* The following variable value is an insn group barrier. */
5870 static rtx dfa_stop_insn;
5872 /* The following variable value is the last issued insn. */
5874 static rtx last_scheduled_insn;
5876 /* The following variable value is size of the DFA state. */
5878 static size_t dfa_state_size;
5880 /* The following variable value is pointer to a DFA state used as
5881 temporary variable. */
5883 static state_t temp_dfa_state = NULL;
5885 /* The following variable value is DFA state after issuing the last
5888 static state_t prev_cycle_state = NULL;
5890 /* The following array element values are TRUE if the corresponding
5891 insn requires to add stop bits before it. */
5893 static char *stops_p;
5895 /* The following variable is used to set up the mentioned above array. */
5897 static int stop_before_p = 0;
5899 /* The following variable value is length of the arrays `clocks' and
5902 static int clocks_length;
5904 /* The following array element values are cycles on which the
5905 corresponding insn will be issued. The array is used only for
5910 /* The following array element values are numbers of cycles should be
5911 added to improve insn scheduling for MM_insns for Itanium1. */
5913 static int *add_cycles;
5915 static rtx ia64_single_set (rtx);
5916 static void ia64_emit_insn_before (rtx, rtx);
5918 /* Map a bundle number to its pseudo-op. */
5921 get_bundle_name (int b)
5923 return bundle_name[b];
5927 /* Return the maximum number of instructions a cpu can issue. */
5930 ia64_issue_rate (void)
5935 /* Helper function - like single_set, but look inside COND_EXEC. */
5938 ia64_single_set (rtx insn)
5940 rtx x = PATTERN (insn), ret;
5941 if (GET_CODE (x) == COND_EXEC)
5942 x = COND_EXEC_CODE (x);
5943 if (GET_CODE (x) == SET)
5946 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5947 Although they are not classical single set, the second set is there just
5948 to protect it from moving past FP-relative stack accesses. */
5949 switch (recog_memoized (insn))
5951 case CODE_FOR_prologue_allocate_stack:
5952 case CODE_FOR_epilogue_deallocate_stack:
5953 ret = XVECEXP (x, 0, 0);
5957 ret = single_set_2 (insn, x);
5964 /* Adjust the cost of a scheduling dependency. Return the new cost of
5965 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5968 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5970 enum attr_itanium_class dep_class;
5971 enum attr_itanium_class insn_class;
5973 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5976 insn_class = ia64_safe_itanium_class (insn);
5977 dep_class = ia64_safe_itanium_class (dep_insn);
5978 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5979 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5985 /* Like emit_insn_before, but skip cycle_display notes.
5986 ??? When cycle display notes are implemented, update this. */
5989 ia64_emit_insn_before (rtx insn, rtx before)
5991 emit_insn_before (insn, before);
5994 /* The following function marks insns who produce addresses for load
5995 and store insns. Such insns will be placed into M slots because it
5996 decrease latency time for Itanium1 (see function
5997 `ia64_produce_address_p' and the DFA descriptions). */
6000 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6002 rtx insn, link, next, next_tail;
6004 /* Before reload, which_alternative is not set, which means that
6005 ia64_safe_itanium_class will produce wrong results for (at least)
6006 move instructions. */
6007 if (!reload_completed)
6010 next_tail = NEXT_INSN (tail);
6011 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6014 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6016 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6018 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6020 if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6022 next = XEXP (link, 0);
6023 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6024 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6025 && ia64_st_address_bypass_p (insn, next))
6027 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6028 || ia64_safe_itanium_class (next)
6029 == ITANIUM_CLASS_FLD)
6030 && ia64_ld_address_bypass_p (insn, next))
6033 insn->call = link != 0;
6037 /* We're beginning a new block. Initialize data structures as necessary. */
6040 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6041 int sched_verbose ATTRIBUTE_UNUSED,
6042 int max_ready ATTRIBUTE_UNUSED)
6044 #ifdef ENABLE_CHECKING
6047 if (reload_completed)
6048 for (insn = NEXT_INSN (current_sched_info->prev_head);
6049 insn != current_sched_info->next_tail;
6050 insn = NEXT_INSN (insn))
6051 gcc_assert (!SCHED_GROUP_P (insn));
6053 last_scheduled_insn = NULL_RTX;
6054 init_insn_group_barriers ();
6057 /* We are about to being issuing insns for this clock cycle.
6058 Override the default sort algorithm to better slot instructions. */
6061 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6062 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6066 int n_ready = *pn_ready;
6067 rtx *e_ready = ready + n_ready;
6071 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6073 if (reorder_type == 0)
6075 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6077 for (insnp = ready; insnp < e_ready; insnp++)
6078 if (insnp < e_ready)
6081 enum attr_type t = ia64_safe_type (insn);
6082 if (t == TYPE_UNKNOWN)
6084 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6085 || asm_noperands (PATTERN (insn)) >= 0)
6087 rtx lowest = ready[n_asms];
6088 ready[n_asms] = insn;
6094 rtx highest = ready[n_ready - 1];
6095 ready[n_ready - 1] = insn;
6102 if (n_asms < n_ready)
6104 /* Some normal insns to process. Skip the asms. */
6108 else if (n_ready > 0)
6112 if (ia64_final_schedule)
6115 int nr_need_stop = 0;
6117 for (insnp = ready; insnp < e_ready; insnp++)
6118 if (safe_group_barrier_needed (*insnp))
6121 if (reorder_type == 1 && n_ready == nr_need_stop)
6123 if (reorder_type == 0)
6126 /* Move down everything that needs a stop bit, preserving
6128 while (insnp-- > ready + deleted)
6129 while (insnp >= ready + deleted)
6132 if (! safe_group_barrier_needed (insn))
6134 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6145 /* We are about to being issuing insns for this clock cycle. Override
6146 the default sort algorithm to better slot instructions. */
6149 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6152 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6153 pn_ready, clock_var, 0);
6156 /* Like ia64_sched_reorder, but called after issuing each insn.
6157 Override the default sort algorithm to better slot instructions. */
6160 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6161 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6162 int *pn_ready, int clock_var)
6164 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6165 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6166 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6170 /* We are about to issue INSN. Return the number of insns left on the
6171 ready queue that can be issued this cycle. */
6174 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6175 int sched_verbose ATTRIBUTE_UNUSED,
6176 rtx insn ATTRIBUTE_UNUSED,
6177 int can_issue_more ATTRIBUTE_UNUSED)
6179 last_scheduled_insn = insn;
6180 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6181 if (reload_completed)
6183 int needed = group_barrier_needed (insn);
6185 gcc_assert (!needed);
6186 if (GET_CODE (insn) == CALL_INSN)
6187 init_insn_group_barriers ();
6188 stops_p [INSN_UID (insn)] = stop_before_p;
6194 /* We are choosing insn from the ready queue. Return nonzero if INSN
6198 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6200 gcc_assert (insn && INSN_P (insn));
6201 return (!reload_completed
6202 || !safe_group_barrier_needed (insn));
6205 /* The following variable value is pseudo-insn used by the DFA insn
6206 scheduler to change the DFA state when the simulated clock is
6209 static rtx dfa_pre_cycle_insn;
6211 /* We are about to being issuing INSN. Return nonzero if we cannot
6212 issue it on given cycle CLOCK and return zero if we should not sort
6213 the ready queue on the next clock start. */
6216 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6217 int clock, int *sort_p)
6219 int setup_clocks_p = FALSE;
6221 gcc_assert (insn && INSN_P (insn));
6222 if ((reload_completed && safe_group_barrier_needed (insn))
6223 || (last_scheduled_insn
6224 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6225 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6226 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6228 init_insn_group_barriers ();
6229 if (verbose && dump)
6230 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6231 last_clock == clock ? " + cycle advance" : "");
6233 if (last_clock == clock)
6235 state_transition (curr_state, dfa_stop_insn);
6236 if (TARGET_EARLY_STOP_BITS)
6237 *sort_p = (last_scheduled_insn == NULL_RTX
6238 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6243 else if (reload_completed)
6244 setup_clocks_p = TRUE;
6245 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6246 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6247 state_reset (curr_state);
6250 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6251 state_transition (curr_state, dfa_stop_insn);
6252 state_transition (curr_state, dfa_pre_cycle_insn);
6253 state_transition (curr_state, NULL);
6256 else if (reload_completed)
6257 setup_clocks_p = TRUE;
6258 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6259 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6260 && asm_noperands (PATTERN (insn)) < 0)
6262 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6264 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6269 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6270 if (REG_NOTE_KIND (link) == 0)
6272 enum attr_itanium_class dep_class;
6273 rtx dep_insn = XEXP (link, 0);
6275 dep_class = ia64_safe_itanium_class (dep_insn);
6276 if ((dep_class == ITANIUM_CLASS_MMMUL
6277 || dep_class == ITANIUM_CLASS_MMSHF)
6278 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6280 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6281 d = last_clock - clocks [INSN_UID (dep_insn)];
6284 add_cycles [INSN_UID (insn)] = 3 - d;
6292 /* The following page contains abstract data `bundle states' which are
6293 used for bundling insns (inserting nops and template generation). */
6295 /* The following describes state of insn bundling. */
6299 /* Unique bundle state number to identify them in the debugging
6302 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6303 /* number nops before and after the insn */
6304 short before_nops_num, after_nops_num;
6305 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6307 int cost; /* cost of the state in cycles */
6308 int accumulated_insns_num; /* number of all previous insns including
6309 nops. L is considered as 2 insns */
6310 int branch_deviation; /* deviation of previous branches from 3rd slots */
6311 struct bundle_state *next; /* next state with the same insn_num */
6312 struct bundle_state *originator; /* originator (previous insn state) */
6313 /* All bundle states are in the following chain. */
6314 struct bundle_state *allocated_states_chain;
6315 /* The DFA State after issuing the insn and the nops. */
6319 /* The following is map insn number to the corresponding bundle state. */
6321 static struct bundle_state **index_to_bundle_states;
6323 /* The unique number of next bundle state. */
6325 static int bundle_states_num;
6327 /* All allocated bundle states are in the following chain. */
6329 static struct bundle_state *allocated_bundle_states_chain;
6331 /* All allocated but not used bundle states are in the following
6334 static struct bundle_state *free_bundle_state_chain;
6337 /* The following function returns a free bundle state. */
6339 static struct bundle_state *
6340 get_free_bundle_state (void)
6342 struct bundle_state *result;
6344 if (free_bundle_state_chain != NULL)
6346 result = free_bundle_state_chain;
6347 free_bundle_state_chain = result->next;
6351 result = xmalloc (sizeof (struct bundle_state));
6352 result->dfa_state = xmalloc (dfa_state_size);
6353 result->allocated_states_chain = allocated_bundle_states_chain;
6354 allocated_bundle_states_chain = result;
6356 result->unique_num = bundle_states_num++;
6361 /* The following function frees given bundle state. */
6364 free_bundle_state (struct bundle_state *state)
6366 state->next = free_bundle_state_chain;
6367 free_bundle_state_chain = state;
6370 /* Start work with abstract data `bundle states'. */
6373 initiate_bundle_states (void)
6375 bundle_states_num = 0;
6376 free_bundle_state_chain = NULL;
6377 allocated_bundle_states_chain = NULL;
6380 /* Finish work with abstract data `bundle states'. */
6383 finish_bundle_states (void)
6385 struct bundle_state *curr_state, *next_state;
6387 for (curr_state = allocated_bundle_states_chain;
6389 curr_state = next_state)
6391 next_state = curr_state->allocated_states_chain;
6392 free (curr_state->dfa_state);
6397 /* Hash table of the bundle states. The key is dfa_state and insn_num
6398 of the bundle states. */
6400 static htab_t bundle_state_table;
6402 /* The function returns hash of BUNDLE_STATE. */
6405 bundle_state_hash (const void *bundle_state)
6407 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6410 for (result = i = 0; i < dfa_state_size; i++)
6411 result += (((unsigned char *) state->dfa_state) [i]
6412 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6413 return result + state->insn_num;
6416 /* The function returns nonzero if the bundle state keys are equal. */
6419 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6421 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6422 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6424 return (state1->insn_num == state2->insn_num
6425 && memcmp (state1->dfa_state, state2->dfa_state,
6426 dfa_state_size) == 0);
6429 /* The function inserts the BUNDLE_STATE into the hash table. The
6430 function returns nonzero if the bundle has been inserted into the
6431 table. The table contains the best bundle state with given key. */
6434 insert_bundle_state (struct bundle_state *bundle_state)
6438 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6439 if (*entry_ptr == NULL)
6441 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6442 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6443 *entry_ptr = (void *) bundle_state;
6446 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6447 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6448 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6449 > bundle_state->accumulated_insns_num
6450 || (((struct bundle_state *)
6451 *entry_ptr)->accumulated_insns_num
6452 == bundle_state->accumulated_insns_num
6453 && ((struct bundle_state *)
6454 *entry_ptr)->branch_deviation
6455 > bundle_state->branch_deviation))))
6458 struct bundle_state temp;
6460 temp = *(struct bundle_state *) *entry_ptr;
6461 *(struct bundle_state *) *entry_ptr = *bundle_state;
6462 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6463 *bundle_state = temp;
6468 /* Start work with the hash table. */
6471 initiate_bundle_state_table (void)
6473 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6477 /* Finish work with the hash table. */
6480 finish_bundle_state_table (void)
6482 htab_delete (bundle_state_table);
6487 /* The following variable is a insn `nop' used to check bundle states
6488 with different number of inserted nops. */
6490 static rtx ia64_nop;
6492 /* The following function tries to issue NOPS_NUM nops for the current
6493 state without advancing processor cycle. If it failed, the
6494 function returns FALSE and frees the current state. */
6497 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6501 for (i = 0; i < nops_num; i++)
6502 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6504 free_bundle_state (curr_state);
6510 /* The following function tries to issue INSN for the current
6511 state without advancing processor cycle. If it failed, the
6512 function returns FALSE and frees the current state. */
6515 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6517 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6519 free_bundle_state (curr_state);
6525 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6526 starting with ORIGINATOR without advancing processor cycle. If
6527 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6528 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6529 If it was successful, the function creates new bundle state and
6530 insert into the hash table and into `index_to_bundle_states'. */
6533 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6534 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6536 struct bundle_state *curr_state;
6538 curr_state = get_free_bundle_state ();
6539 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6540 curr_state->insn = insn;
6541 curr_state->insn_num = originator->insn_num + 1;
6542 curr_state->cost = originator->cost;
6543 curr_state->originator = originator;
6544 curr_state->before_nops_num = before_nops_num;
6545 curr_state->after_nops_num = 0;
6546 curr_state->accumulated_insns_num
6547 = originator->accumulated_insns_num + before_nops_num;
6548 curr_state->branch_deviation = originator->branch_deviation;
6550 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6552 gcc_assert (GET_MODE (insn) != TImode);
6553 if (!try_issue_nops (curr_state, before_nops_num))
6555 if (!try_issue_insn (curr_state, insn))
6557 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6558 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6559 && curr_state->accumulated_insns_num % 3 != 0)
6561 free_bundle_state (curr_state);
6565 else if (GET_MODE (insn) != TImode)
6567 if (!try_issue_nops (curr_state, before_nops_num))
6569 if (!try_issue_insn (curr_state, insn))
6571 curr_state->accumulated_insns_num++;
6572 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
6573 && asm_noperands (PATTERN (insn)) < 0);
6575 if (ia64_safe_type (insn) == TYPE_L)
6576 curr_state->accumulated_insns_num++;
6580 /* If this is an insn that must be first in a group, then don't allow
6581 nops to be emitted before it. Currently, alloc is the only such
6582 supported instruction. */
6583 /* ??? The bundling automatons should handle this for us, but they do
6584 not yet have support for the first_insn attribute. */
6585 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
6587 free_bundle_state (curr_state);
6591 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6592 state_transition (curr_state->dfa_state, NULL);
6594 if (!try_issue_nops (curr_state, before_nops_num))
6596 if (!try_issue_insn (curr_state, insn))
6598 curr_state->accumulated_insns_num++;
6599 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6600 || asm_noperands (PATTERN (insn)) >= 0)
6602 /* Finish bundle containing asm insn. */
6603 curr_state->after_nops_num
6604 = 3 - curr_state->accumulated_insns_num % 3;
6605 curr_state->accumulated_insns_num
6606 += 3 - curr_state->accumulated_insns_num % 3;
6608 else if (ia64_safe_type (insn) == TYPE_L)
6609 curr_state->accumulated_insns_num++;
6611 if (ia64_safe_type (insn) == TYPE_B)
6612 curr_state->branch_deviation
6613 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6614 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6616 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6619 struct bundle_state *curr_state1;
6620 struct bundle_state *allocated_states_chain;
6622 curr_state1 = get_free_bundle_state ();
6623 dfa_state = curr_state1->dfa_state;
6624 allocated_states_chain = curr_state1->allocated_states_chain;
6625 *curr_state1 = *curr_state;
6626 curr_state1->dfa_state = dfa_state;
6627 curr_state1->allocated_states_chain = allocated_states_chain;
6628 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6630 curr_state = curr_state1;
6632 if (!try_issue_nops (curr_state,
6633 3 - curr_state->accumulated_insns_num % 3))
6635 curr_state->after_nops_num
6636 = 3 - curr_state->accumulated_insns_num % 3;
6637 curr_state->accumulated_insns_num
6638 += 3 - curr_state->accumulated_insns_num % 3;
6640 if (!insert_bundle_state (curr_state))
6641 free_bundle_state (curr_state);
6645 /* The following function returns position in the two window bundle
6649 get_max_pos (state_t state)
6651 if (cpu_unit_reservation_p (state, pos_6))
6653 else if (cpu_unit_reservation_p (state, pos_5))
6655 else if (cpu_unit_reservation_p (state, pos_4))
6657 else if (cpu_unit_reservation_p (state, pos_3))
6659 else if (cpu_unit_reservation_p (state, pos_2))
6661 else if (cpu_unit_reservation_p (state, pos_1))
6667 /* The function returns code of a possible template for given position
6668 and state. The function should be called only with 2 values of
6669 position equal to 3 or 6. We avoid generating F NOPs by putting
6670 templates containing F insns at the end of the template search
6671 because undocumented anomaly in McKinley derived cores which can
6672 cause stalls if an F-unit insn (including a NOP) is issued within a
6673 six-cycle window after reading certain application registers (such
6674 as ar.bsp). Furthermore, power-considerations also argue against
6675 the use of F-unit instructions unless they're really needed. */
6678 get_template (state_t state, int pos)
6683 if (cpu_unit_reservation_p (state, _0mmi_))
6685 else if (cpu_unit_reservation_p (state, _0mii_))
6687 else if (cpu_unit_reservation_p (state, _0mmb_))
6689 else if (cpu_unit_reservation_p (state, _0mib_))
6691 else if (cpu_unit_reservation_p (state, _0mbb_))
6693 else if (cpu_unit_reservation_p (state, _0bbb_))
6695 else if (cpu_unit_reservation_p (state, _0mmf_))
6697 else if (cpu_unit_reservation_p (state, _0mfi_))
6699 else if (cpu_unit_reservation_p (state, _0mfb_))
6701 else if (cpu_unit_reservation_p (state, _0mlx_))
6706 if (cpu_unit_reservation_p (state, _1mmi_))
6708 else if (cpu_unit_reservation_p (state, _1mii_))
6710 else if (cpu_unit_reservation_p (state, _1mmb_))
6712 else if (cpu_unit_reservation_p (state, _1mib_))
6714 else if (cpu_unit_reservation_p (state, _1mbb_))
6716 else if (cpu_unit_reservation_p (state, _1bbb_))
6718 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6720 else if (cpu_unit_reservation_p (state, _1mfi_))
6722 else if (cpu_unit_reservation_p (state, _1mfb_))
6724 else if (cpu_unit_reservation_p (state, _1mlx_))
6733 /* The following function returns an insn important for insn bundling
6734 followed by INSN and before TAIL. */
6737 get_next_important_insn (rtx insn, rtx tail)
6739 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6741 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6742 && GET_CODE (PATTERN (insn)) != USE
6743 && GET_CODE (PATTERN (insn)) != CLOBBER)
6748 /* The following function does insn bundling. Bundling means
6749 inserting templates and nop insns to fit insn groups into permitted
6750 templates. Instruction scheduling uses NDFA (non-deterministic
6751 finite automata) encoding informations about the templates and the
6752 inserted nops. Nondeterminism of the automata permits follows
6753 all possible insn sequences very fast.
6755 Unfortunately it is not possible to get information about inserting
6756 nop insns and used templates from the automata states. The
6757 automata only says that we can issue an insn possibly inserting
6758 some nops before it and using some template. Therefore insn
6759 bundling in this function is implemented by using DFA
6760 (deterministic finite automata). We follows all possible insn
6761 sequences by inserting 0-2 nops (that is what the NDFA describe for
6762 insn scheduling) before/after each insn being bundled. We know the
6763 start of simulated processor cycle from insn scheduling (insn
6764 starting a new cycle has TImode).
6766 Simple implementation of insn bundling would create enormous
6767 number of possible insn sequences satisfying information about new
6768 cycle ticks taken from the insn scheduling. To make the algorithm
6769 practical we use dynamic programming. Each decision (about
6770 inserting nops and implicitly about previous decisions) is described
6771 by structure bundle_state (see above). If we generate the same
6772 bundle state (key is automaton state after issuing the insns and
6773 nops for it), we reuse already generated one. As consequence we
6774 reject some decisions which cannot improve the solution and
6775 reduce memory for the algorithm.
6777 When we reach the end of EBB (extended basic block), we choose the
6778 best sequence and then, moving back in EBB, insert templates for
6779 the best alternative. The templates are taken from querying
6780 automaton state for each insn in chosen bundle states.
6782 So the algorithm makes two (forward and backward) passes through
6783 EBB. There is an additional forward pass through EBB for Itanium1
6784 processor. This pass inserts more nops to make dependency between
6785 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6788 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6790 struct bundle_state *curr_state, *next_state, *best_state;
6791 rtx insn, next_insn;
6793 int i, bundle_end_p, only_bundle_end_p, asm_p;
6794 int pos = 0, max_pos, template0, template1;
6797 enum attr_type type;
6800 /* Count insns in the EBB. */
6801 for (insn = NEXT_INSN (prev_head_insn);
6802 insn && insn != tail;
6803 insn = NEXT_INSN (insn))
6809 dfa_clean_insn_cache ();
6810 initiate_bundle_state_table ();
6811 index_to_bundle_states = xmalloc ((insn_num + 2)
6812 * sizeof (struct bundle_state *));
6813 /* First (forward) pass -- generation of bundle states. */
6814 curr_state = get_free_bundle_state ();
6815 curr_state->insn = NULL;
6816 curr_state->before_nops_num = 0;
6817 curr_state->after_nops_num = 0;
6818 curr_state->insn_num = 0;
6819 curr_state->cost = 0;
6820 curr_state->accumulated_insns_num = 0;
6821 curr_state->branch_deviation = 0;
6822 curr_state->next = NULL;
6823 curr_state->originator = NULL;
6824 state_reset (curr_state->dfa_state);
6825 index_to_bundle_states [0] = curr_state;
6827 /* Shift cycle mark if it is put on insn which could be ignored. */
6828 for (insn = NEXT_INSN (prev_head_insn);
6830 insn = NEXT_INSN (insn))
6832 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6833 || GET_CODE (PATTERN (insn)) == USE
6834 || GET_CODE (PATTERN (insn)) == CLOBBER)
6835 && GET_MODE (insn) == TImode)
6837 PUT_MODE (insn, VOIDmode);
6838 for (next_insn = NEXT_INSN (insn);
6840 next_insn = NEXT_INSN (next_insn))
6841 if (INSN_P (next_insn)
6842 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6843 && GET_CODE (PATTERN (next_insn)) != USE
6844 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6846 PUT_MODE (next_insn, TImode);
6850 /* Froward pass: generation of bundle states. */
6851 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6855 gcc_assert (INSN_P (insn)
6856 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6857 && GET_CODE (PATTERN (insn)) != USE
6858 && GET_CODE (PATTERN (insn)) != CLOBBER);
6859 type = ia64_safe_type (insn);
6860 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6862 index_to_bundle_states [insn_num] = NULL;
6863 for (curr_state = index_to_bundle_states [insn_num - 1];
6865 curr_state = next_state)
6867 pos = curr_state->accumulated_insns_num % 3;
6868 next_state = curr_state->next;
6869 /* We must fill up the current bundle in order to start a
6870 subsequent asm insn in a new bundle. Asm insn is always
6871 placed in a separate bundle. */
6873 = (next_insn != NULL_RTX
6874 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6875 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6876 /* We may fill up the current bundle if it is the cycle end
6877 without a group barrier. */
6879 = (only_bundle_end_p || next_insn == NULL_RTX
6880 || (GET_MODE (next_insn) == TImode
6881 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6882 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6884 /* We need to insert 2 nops for cases like M_MII. To
6885 guarantee issuing all insns on the same cycle for
6886 Itanium 1, we need to issue 2 nops after the first M
6887 insn (MnnMII where n is a nop insn). */
6888 || ((type == TYPE_M || type == TYPE_A)
6889 && ia64_tune == PROCESSOR_ITANIUM
6890 && !bundle_end_p && pos == 1))
6891 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6893 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6895 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6898 gcc_assert (index_to_bundle_states [insn_num]);
6899 for (curr_state = index_to_bundle_states [insn_num];
6901 curr_state = curr_state->next)
6902 if (verbose >= 2 && dump)
6904 /* This structure is taken from generated code of the
6905 pipeline hazard recognizer (see file insn-attrtab.c).
6906 Please don't forget to change the structure if a new
6907 automaton is added to .md file. */
6910 unsigned short one_automaton_state;
6911 unsigned short oneb_automaton_state;
6912 unsigned short two_automaton_state;
6913 unsigned short twob_automaton_state;
6918 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6919 curr_state->unique_num,
6920 (curr_state->originator == NULL
6921 ? -1 : curr_state->originator->unique_num),
6923 curr_state->before_nops_num, curr_state->after_nops_num,
6924 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6925 (ia64_tune == PROCESSOR_ITANIUM
6926 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6927 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6932 /* We should find a solution because the 2nd insn scheduling has
6934 gcc_assert (index_to_bundle_states [insn_num]);
6935 /* Find a state corresponding to the best insn sequence. */
6937 for (curr_state = index_to_bundle_states [insn_num];
6939 curr_state = curr_state->next)
6940 /* We are just looking at the states with fully filled up last
6941 bundle. The first we prefer insn sequences with minimal cost
6942 then with minimal inserted nops and finally with branch insns
6943 placed in the 3rd slots. */
6944 if (curr_state->accumulated_insns_num % 3 == 0
6945 && (best_state == NULL || best_state->cost > curr_state->cost
6946 || (best_state->cost == curr_state->cost
6947 && (curr_state->accumulated_insns_num
6948 < best_state->accumulated_insns_num
6949 || (curr_state->accumulated_insns_num
6950 == best_state->accumulated_insns_num
6951 && curr_state->branch_deviation
6952 < best_state->branch_deviation)))))
6953 best_state = curr_state;
6954 /* Second (backward) pass: adding nops and templates. */
6955 insn_num = best_state->before_nops_num;
6956 template0 = template1 = -1;
6957 for (curr_state = best_state;
6958 curr_state->originator != NULL;
6959 curr_state = curr_state->originator)
6961 insn = curr_state->insn;
6962 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6963 || asm_noperands (PATTERN (insn)) >= 0);
6965 if (verbose >= 2 && dump)
6969 unsigned short one_automaton_state;
6970 unsigned short oneb_automaton_state;
6971 unsigned short two_automaton_state;
6972 unsigned short twob_automaton_state;
6977 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6978 curr_state->unique_num,
6979 (curr_state->originator == NULL
6980 ? -1 : curr_state->originator->unique_num),
6982 curr_state->before_nops_num, curr_state->after_nops_num,
6983 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6984 (ia64_tune == PROCESSOR_ITANIUM
6985 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6986 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6989 /* Find the position in the current bundle window. The window can
6990 contain at most two bundles. Two bundle window means that
6991 the processor will make two bundle rotation. */
6992 max_pos = get_max_pos (curr_state->dfa_state);
6994 /* The following (negative template number) means that the
6995 processor did one bundle rotation. */
6996 || (max_pos == 3 && template0 < 0))
6998 /* We are at the end of the window -- find template(s) for
7002 template0 = get_template (curr_state->dfa_state, 3);
7005 template1 = get_template (curr_state->dfa_state, 3);
7006 template0 = get_template (curr_state->dfa_state, 6);
7009 if (max_pos > 3 && template1 < 0)
7010 /* It may happen when we have the stop inside a bundle. */
7012 gcc_assert (pos <= 3);
7013 template1 = get_template (curr_state->dfa_state, 3);
7017 /* Emit nops after the current insn. */
7018 for (i = 0; i < curr_state->after_nops_num; i++)
7021 emit_insn_after (nop, insn);
7023 gcc_assert (pos >= 0);
7026 /* We are at the start of a bundle: emit the template
7027 (it should be defined). */
7028 gcc_assert (template0 >= 0);
7029 b = gen_bundle_selector (GEN_INT (template0));
7030 ia64_emit_insn_before (b, nop);
7031 /* If we have two bundle window, we make one bundle
7032 rotation. Otherwise template0 will be undefined
7033 (negative value). */
7034 template0 = template1;
7038 /* Move the position backward in the window. Group barrier has
7039 no slot. Asm insn takes all bundle. */
7040 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7041 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7042 && asm_noperands (PATTERN (insn)) < 0)
7044 /* Long insn takes 2 slots. */
7045 if (ia64_safe_type (insn) == TYPE_L)
7047 gcc_assert (pos >= 0);
7049 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7050 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7051 && asm_noperands (PATTERN (insn)) < 0)
7053 /* The current insn is at the bundle start: emit the
7055 gcc_assert (template0 >= 0);
7056 b = gen_bundle_selector (GEN_INT (template0));
7057 ia64_emit_insn_before (b, insn);
7058 b = PREV_INSN (insn);
7060 /* See comment above in analogous place for emitting nops
7062 template0 = template1;
7065 /* Emit nops after the current insn. */
7066 for (i = 0; i < curr_state->before_nops_num; i++)
7069 ia64_emit_insn_before (nop, insn);
7070 nop = PREV_INSN (insn);
7073 gcc_assert (pos >= 0);
7076 /* See comment above in analogous place for emitting nops
7078 gcc_assert (template0 >= 0);
7079 b = gen_bundle_selector (GEN_INT (template0));
7080 ia64_emit_insn_before (b, insn);
7081 b = PREV_INSN (insn);
7083 template0 = template1;
7088 if (ia64_tune == PROCESSOR_ITANIUM)
7089 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7090 Itanium1 has a strange design, if the distance between an insn
7091 and dependent MM-insn is less 4 then we have a 6 additional
7092 cycles stall. So we make the distance equal to 4 cycles if it
7094 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7098 gcc_assert (INSN_P (insn)
7099 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7100 && GET_CODE (PATTERN (insn)) != USE
7101 && GET_CODE (PATTERN (insn)) != CLOBBER);
7102 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7103 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7104 /* We found a MM-insn which needs additional cycles. */
7110 /* Now we are searching for a template of the bundle in
7111 which the MM-insn is placed and the position of the
7112 insn in the bundle (0, 1, 2). Also we are searching
7113 for that there is a stop before the insn. */
7114 last = prev_active_insn (insn);
7115 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7117 last = prev_active_insn (last);
7119 for (;; last = prev_active_insn (last))
7120 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7122 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7124 /* The insn is in MLX bundle. Change the template
7125 onto MFI because we will add nops before the
7126 insn. It simplifies subsequent code a lot. */
7128 = gen_bundle_selector (const2_rtx); /* -> MFI */
7131 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7132 && (ia64_safe_itanium_class (last)
7133 != ITANIUM_CLASS_IGNORE))
7135 /* Some check of correctness: the stop is not at the
7136 bundle start, there are no more 3 insns in the bundle,
7137 and the MM-insn is not at the start of bundle with
7139 gcc_assert ((!pred_stop_p || n)
7141 && (template0 != 9 || !n));
7142 /* Put nops after the insn in the bundle. */
7143 for (j = 3 - n; j > 0; j --)
7144 ia64_emit_insn_before (gen_nop (), insn);
7145 /* It takes into account that we will add more N nops
7146 before the insn lately -- please see code below. */
7147 add_cycles [INSN_UID (insn)]--;
7148 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7149 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7152 add_cycles [INSN_UID (insn)]--;
7153 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7155 /* Insert "MII;" template. */
7156 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
7158 ia64_emit_insn_before (gen_nop (), insn);
7159 ia64_emit_insn_before (gen_nop (), insn);
7162 /* To decrease code size, we use "MI;I;"
7164 ia64_emit_insn_before
7165 (gen_insn_group_barrier (GEN_INT (3)), insn);
7168 ia64_emit_insn_before (gen_nop (), insn);
7169 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7172 /* Put the MM-insn in the same slot of a bundle with the
7173 same template as the original one. */
7174 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7176 /* To put the insn in the same slot, add necessary number
7178 for (j = n; j > 0; j --)
7179 ia64_emit_insn_before (gen_nop (), insn);
7180 /* Put the stop if the original bundle had it. */
7182 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7186 free (index_to_bundle_states);
7187 finish_bundle_state_table ();
7189 dfa_clean_insn_cache ();
7192 /* The following function is called at the end of scheduling BB or
7193 EBB. After reload, it inserts stop bits and does insn bundling. */
7196 ia64_sched_finish (FILE *dump, int sched_verbose)
7199 fprintf (dump, "// Finishing schedule.\n");
7200 if (!reload_completed)
7202 if (reload_completed)
7204 final_emit_insn_group_barriers (dump);
7205 bundling (dump, sched_verbose, current_sched_info->prev_head,
7206 current_sched_info->next_tail);
7207 if (sched_verbose && dump)
7208 fprintf (dump, "// finishing %d-%d\n",
7209 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7210 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7216 /* The following function inserts stop bits in scheduled BB or EBB. */
7219 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7222 int need_barrier_p = 0;
7223 rtx prev_insn = NULL_RTX;
7225 init_insn_group_barriers ();
7227 for (insn = NEXT_INSN (current_sched_info->prev_head);
7228 insn != current_sched_info->next_tail;
7229 insn = NEXT_INSN (insn))
7231 if (GET_CODE (insn) == BARRIER)
7233 rtx last = prev_active_insn (insn);
7237 if (GET_CODE (last) == JUMP_INSN
7238 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7239 last = prev_active_insn (last);
7240 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7241 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7243 init_insn_group_barriers ();
7245 prev_insn = NULL_RTX;
7247 else if (INSN_P (insn))
7249 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7251 init_insn_group_barriers ();
7253 prev_insn = NULL_RTX;
7255 else if (need_barrier_p || group_barrier_needed (insn))
7257 if (TARGET_EARLY_STOP_BITS)
7262 last != current_sched_info->prev_head;
7263 last = PREV_INSN (last))
7264 if (INSN_P (last) && GET_MODE (last) == TImode
7265 && stops_p [INSN_UID (last)])
7267 if (last == current_sched_info->prev_head)
7269 last = prev_active_insn (last);
7271 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7272 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7274 init_insn_group_barriers ();
7275 for (last = NEXT_INSN (last);
7277 last = NEXT_INSN (last))
7279 group_barrier_needed (last);
7283 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7285 init_insn_group_barriers ();
7287 group_barrier_needed (insn);
7288 prev_insn = NULL_RTX;
7290 else if (recog_memoized (insn) >= 0)
7292 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7293 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7294 || asm_noperands (PATTERN (insn)) >= 0);
7301 /* If the following function returns TRUE, we will use the the DFA
7305 ia64_first_cycle_multipass_dfa_lookahead (void)
7307 return (reload_completed ? 6 : 4);
7310 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7313 ia64_init_dfa_pre_cycle_insn (void)
7315 if (temp_dfa_state == NULL)
7317 dfa_state_size = state_size ();
7318 temp_dfa_state = xmalloc (dfa_state_size);
7319 prev_cycle_state = xmalloc (dfa_state_size);
7321 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7322 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7323 recog_memoized (dfa_pre_cycle_insn);
7324 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7325 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7326 recog_memoized (dfa_stop_insn);
7329 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7330 used by the DFA insn scheduler. */
7333 ia64_dfa_pre_cycle_insn (void)
7335 return dfa_pre_cycle_insn;
7338 /* The following function returns TRUE if PRODUCER (of type ilog or
7339 ld) produces address for CONSUMER (of type st or stf). */
7342 ia64_st_address_bypass_p (rtx producer, rtx consumer)
7346 gcc_assert (producer && consumer);
7347 dest = ia64_single_set (producer);
7349 reg = SET_DEST (dest);
7351 if (GET_CODE (reg) == SUBREG)
7352 reg = SUBREG_REG (reg);
7353 gcc_assert (GET_CODE (reg) == REG);
7355 dest = ia64_single_set (consumer);
7357 mem = SET_DEST (dest);
7358 gcc_assert (mem && GET_CODE (mem) == MEM);
7359 return reg_mentioned_p (reg, mem);
7362 /* The following function returns TRUE if PRODUCER (of type ilog or
7363 ld) produces address for CONSUMER (of type ld or fld). */
7366 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7368 rtx dest, src, reg, mem;
7370 gcc_assert (producer && consumer);
7371 dest = ia64_single_set (producer);
7373 reg = SET_DEST (dest);
7375 if (GET_CODE (reg) == SUBREG)
7376 reg = SUBREG_REG (reg);
7377 gcc_assert (GET_CODE (reg) == REG);
7379 src = ia64_single_set (consumer);
7381 mem = SET_SRC (src);
7383 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7384 mem = XVECEXP (mem, 0, 0);
7385 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7386 mem = XEXP (mem, 0);
7388 /* Note that LO_SUM is used for GOT loads. */
7389 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
7391 return reg_mentioned_p (reg, mem);
7394 /* The following function returns TRUE if INSN produces address for a
7395 load/store insn. We will place such insns into M slot because it
7396 decreases its latency time. */
7399 ia64_produce_address_p (rtx insn)
7405 /* Emit pseudo-ops for the assembler to describe predicate relations.
7406 At present this assumes that we only consider predicate pairs to
7407 be mutex, and that the assembler can deduce proper values from
7408 straight-line code. */
7411 emit_predicate_relation_info (void)
7415 FOR_EACH_BB_REVERSE (bb)
7418 rtx head = BB_HEAD (bb);
7420 /* We only need such notes at code labels. */
7421 if (GET_CODE (head) != CODE_LABEL)
7423 if (GET_CODE (NEXT_INSN (head)) == NOTE
7424 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7425 head = NEXT_INSN (head);
7427 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7428 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7430 rtx p = gen_rtx_REG (BImode, r);
7431 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7432 if (head == BB_END (bb))
7438 /* Look for conditional calls that do not return, and protect predicate
7439 relations around them. Otherwise the assembler will assume the call
7440 returns, and complain about uses of call-clobbered predicates after
7442 FOR_EACH_BB_REVERSE (bb)
7444 rtx insn = BB_HEAD (bb);
7448 if (GET_CODE (insn) == CALL_INSN
7449 && GET_CODE (PATTERN (insn)) == COND_EXEC
7450 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7452 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7453 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7454 if (BB_HEAD (bb) == insn)
7456 if (BB_END (bb) == insn)
7460 if (insn == BB_END (bb))
7462 insn = NEXT_INSN (insn);
7467 /* Perform machine dependent operations on the rtl chain INSNS. */
7472 /* We are freeing block_for_insn in the toplev to keep compatibility
7473 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7474 compute_bb_for_insn ();
7476 /* If optimizing, we'll have split before scheduling. */
7478 split_all_insns (0);
7480 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7481 non-optimizing bootstrap. */
7482 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7484 if (ia64_flag_schedule_insns2)
7486 timevar_push (TV_SCHED2);
7487 ia64_final_schedule = 1;
7489 initiate_bundle_states ();
7490 ia64_nop = make_insn_raw (gen_nop ());
7491 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7492 recog_memoized (ia64_nop);
7493 clocks_length = get_max_uid () + 1;
7494 stops_p = xcalloc (1, clocks_length);
7495 if (ia64_tune == PROCESSOR_ITANIUM)
7497 clocks = xcalloc (clocks_length, sizeof (int));
7498 add_cycles = xcalloc (clocks_length, sizeof (int));
7500 if (ia64_tune == PROCESSOR_ITANIUM2)
7502 pos_1 = get_cpu_unit_code ("2_1");
7503 pos_2 = get_cpu_unit_code ("2_2");
7504 pos_3 = get_cpu_unit_code ("2_3");
7505 pos_4 = get_cpu_unit_code ("2_4");
7506 pos_5 = get_cpu_unit_code ("2_5");
7507 pos_6 = get_cpu_unit_code ("2_6");
7508 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7509 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7510 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7511 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7512 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7513 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7514 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7515 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7516 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7517 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7518 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7519 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7520 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7521 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7522 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7523 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7524 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7525 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7526 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7527 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7531 pos_1 = get_cpu_unit_code ("1_1");
7532 pos_2 = get_cpu_unit_code ("1_2");
7533 pos_3 = get_cpu_unit_code ("1_3");
7534 pos_4 = get_cpu_unit_code ("1_4");
7535 pos_5 = get_cpu_unit_code ("1_5");
7536 pos_6 = get_cpu_unit_code ("1_6");
7537 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7538 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7539 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7540 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7541 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7542 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7543 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7544 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7545 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7546 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7547 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7548 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7549 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7550 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7551 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7552 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7553 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7554 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7555 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7556 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7558 schedule_ebbs (dump_file);
7559 finish_bundle_states ();
7560 if (ia64_tune == PROCESSOR_ITANIUM)
7566 emit_insn_group_barriers (dump_file);
7568 ia64_final_schedule = 0;
7569 timevar_pop (TV_SCHED2);
7572 emit_all_insn_group_barriers (dump_file);
7574 /* A call must not be the last instruction in a function, so that the
7575 return address is still within the function, so that unwinding works
7576 properly. Note that IA-64 differs from dwarf2 on this point. */
7577 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7582 insn = get_last_insn ();
7583 if (! INSN_P (insn))
7584 insn = prev_active_insn (insn);
7585 /* Skip over insns that expand to nothing. */
7586 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7588 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7589 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7591 insn = prev_active_insn (insn);
7593 if (GET_CODE (insn) == CALL_INSN)
7596 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7597 emit_insn (gen_break_f ());
7598 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7602 emit_predicate_relation_info ();
7604 if (ia64_flag_var_tracking)
7606 timevar_push (TV_VAR_TRACKING);
7607 variable_tracking_main ();
7608 timevar_pop (TV_VAR_TRACKING);
7612 /* Return true if REGNO is used by the epilogue. */
7615 ia64_epilogue_uses (int regno)
7620 /* With a call to a function in another module, we will write a new
7621 value to "gp". After returning from such a call, we need to make
7622 sure the function restores the original gp-value, even if the
7623 function itself does not use the gp anymore. */
7624 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7626 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7627 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7628 /* For functions defined with the syscall_linkage attribute, all
7629 input registers are marked as live at all function exits. This
7630 prevents the register allocator from using the input registers,
7631 which in turn makes it possible to restart a system call after
7632 an interrupt without having to save/restore the input registers.
7633 This also prevents kernel data from leaking to application code. */
7634 return lookup_attribute ("syscall_linkage",
7635 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7638 /* Conditional return patterns can't represent the use of `b0' as
7639 the return address, so we force the value live this way. */
7643 /* Likewise for ar.pfs, which is used by br.ret. */
7651 /* Return true if REGNO is used by the frame unwinder. */
7654 ia64_eh_uses (int regno)
7656 if (! reload_completed)
7659 if (current_frame_info.reg_save_b0
7660 && regno == current_frame_info.reg_save_b0)
7662 if (current_frame_info.reg_save_pr
7663 && regno == current_frame_info.reg_save_pr)
7665 if (current_frame_info.reg_save_ar_pfs
7666 && regno == current_frame_info.reg_save_ar_pfs)
7668 if (current_frame_info.reg_save_ar_unat
7669 && regno == current_frame_info.reg_save_ar_unat)
7671 if (current_frame_info.reg_save_ar_lc
7672 && regno == current_frame_info.reg_save_ar_lc)
7678 /* Return true if this goes in small data/bss. */
7680 /* ??? We could also support own long data here. Generating movl/add/ld8
7681 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7682 code faster because there is one less load. This also includes incomplete
7683 types which can't go in sdata/sbss. */
7686 ia64_in_small_data_p (tree exp)
7688 if (TARGET_NO_SDATA)
7691 /* We want to merge strings, so we never consider them small data. */
7692 if (TREE_CODE (exp) == STRING_CST)
7695 /* Functions are never small data. */
7696 if (TREE_CODE (exp) == FUNCTION_DECL)
7699 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7701 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7703 if (strcmp (section, ".sdata") == 0
7704 || strncmp (section, ".sdata.", 7) == 0
7705 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
7706 || strcmp (section, ".sbss") == 0
7707 || strncmp (section, ".sbss.", 6) == 0
7708 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
7713 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7715 /* If this is an incomplete type with size 0, then we can't put it
7716 in sdata because it might be too big when completed. */
7717 if (size > 0 && size <= ia64_section_threshold)
7724 /* Output assembly directives for prologue regions. */
7726 /* The current basic block number. */
7728 static bool last_block;
7730 /* True if we need a copy_state command at the start of the next block. */
7732 static bool need_copy_state;
7734 /* The function emits unwind directives for the start of an epilogue. */
7737 process_epilogue (void)
7739 /* If this isn't the last block of the function, then we need to label the
7740 current state, and copy it back in at the start of the next block. */
7744 fprintf (asm_out_file, "\t.label_state %d\n",
7745 ++cfun->machine->state_num);
7746 need_copy_state = true;
7749 fprintf (asm_out_file, "\t.restore sp\n");
7752 /* This function processes a SET pattern looking for specific patterns
7753 which result in emitting an assembly directive required for unwinding. */
7756 process_set (FILE *asm_out_file, rtx pat)
7758 rtx src = SET_SRC (pat);
7759 rtx dest = SET_DEST (pat);
7760 int src_regno, dest_regno;
7762 /* Look for the ALLOC insn. */
7763 if (GET_CODE (src) == UNSPEC_VOLATILE
7764 && XINT (src, 1) == UNSPECV_ALLOC
7765 && GET_CODE (dest) == REG)
7767 dest_regno = REGNO (dest);
7769 /* If this is the final destination for ar.pfs, then this must
7770 be the alloc in the prologue. */
7771 if (dest_regno == current_frame_info.reg_save_ar_pfs)
7772 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7773 ia64_dbx_register_number (dest_regno));
7776 /* This must be an alloc before a sibcall. We must drop the
7777 old frame info. The easiest way to drop the old frame
7778 info is to ensure we had a ".restore sp" directive
7779 followed by a new prologue. If the procedure doesn't
7780 have a memory-stack frame, we'll issue a dummy ".restore
7782 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7783 /* if haven't done process_epilogue() yet, do it now */
7784 process_epilogue ();
7785 fprintf (asm_out_file, "\t.prologue\n");
7790 /* Look for SP = .... */
7791 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7793 if (GET_CODE (src) == PLUS)
7795 rtx op0 = XEXP (src, 0);
7796 rtx op1 = XEXP (src, 1);
7798 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
7800 if (INTVAL (op1) < 0)
7801 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7804 process_epilogue ();
7808 gcc_assert (GET_CODE (src) == REG
7809 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
7810 process_epilogue ();
7816 /* Register move we need to look at. */
7817 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7819 src_regno = REGNO (src);
7820 dest_regno = REGNO (dest);
7825 /* Saving return address pointer. */
7826 gcc_assert (dest_regno == current_frame_info.reg_save_b0);
7827 fprintf (asm_out_file, "\t.save rp, r%d\n",
7828 ia64_dbx_register_number (dest_regno));
7832 gcc_assert (dest_regno == current_frame_info.reg_save_pr);
7833 fprintf (asm_out_file, "\t.save pr, r%d\n",
7834 ia64_dbx_register_number (dest_regno));
7837 case AR_UNAT_REGNUM:
7838 gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
7839 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7840 ia64_dbx_register_number (dest_regno));
7844 gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
7845 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7846 ia64_dbx_register_number (dest_regno));
7849 case STACK_POINTER_REGNUM:
7850 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
7851 && frame_pointer_needed);
7852 fprintf (asm_out_file, "\t.vframe r%d\n",
7853 ia64_dbx_register_number (dest_regno));
7857 /* Everything else should indicate being stored to memory. */
7862 /* Memory store we need to look at. */
7863 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7869 if (GET_CODE (XEXP (dest, 0)) == REG)
7871 base = XEXP (dest, 0);
7876 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
7877 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
7878 base = XEXP (XEXP (dest, 0), 0);
7879 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7882 if (base == hard_frame_pointer_rtx)
7884 saveop = ".savepsp";
7889 gcc_assert (base == stack_pointer_rtx);
7893 src_regno = REGNO (src);
7897 gcc_assert (!current_frame_info.reg_save_b0);
7898 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7902 gcc_assert (!current_frame_info.reg_save_pr);
7903 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7907 gcc_assert (!current_frame_info.reg_save_ar_lc);
7908 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7912 gcc_assert (!current_frame_info.reg_save_ar_pfs);
7913 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7916 case AR_UNAT_REGNUM:
7917 gcc_assert (!current_frame_info.reg_save_ar_unat);
7918 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7925 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7926 1 << (src_regno - GR_REG (4)));
7934 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7935 1 << (src_regno - BR_REG (1)));
7942 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7943 1 << (src_regno - FR_REG (2)));
7946 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7947 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7948 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7949 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7950 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7951 1 << (src_regno - FR_REG (12)));
7963 /* This function looks at a single insn and emits any directives
7964 required to unwind this insn. */
7966 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7968 if (flag_unwind_tables
7969 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7973 if (GET_CODE (insn) == NOTE
7974 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7976 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7978 /* Restore unwind state from immediately before the epilogue. */
7979 if (need_copy_state)
7981 fprintf (asm_out_file, "\t.body\n");
7982 fprintf (asm_out_file, "\t.copy_state %d\n",
7983 cfun->machine->state_num);
7984 need_copy_state = false;
7988 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7991 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7993 pat = XEXP (pat, 0);
7995 pat = PATTERN (insn);
7997 switch (GET_CODE (pat))
8000 process_set (asm_out_file, pat);
8006 int limit = XVECLEN (pat, 0);
8007 for (par_index = 0; par_index < limit; par_index++)
8009 rtx x = XVECEXP (pat, 0, par_index);
8010 if (GET_CODE (x) == SET)
8011 process_set (asm_out_file, x);
8026 IA64_BUILTIN_FLUSHRS
8030 ia64_init_builtins (void)
8035 /* The __fpreg type. */
8036 fpreg_type = make_node (REAL_TYPE);
8037 /* ??? The back end should know to load/save __fpreg variables using
8038 the ldf.fill and stf.spill instructions. */
8039 TYPE_PRECISION (fpreg_type) = 80;
8040 layout_type (fpreg_type);
8041 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8043 /* The __float80 type. */
8044 float80_type = make_node (REAL_TYPE);
8045 TYPE_PRECISION (float80_type) = 80;
8046 layout_type (float80_type);
8047 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8049 /* The __float128 type. */
8052 tree float128_type = make_node (REAL_TYPE);
8053 TYPE_PRECISION (float128_type) = 128;
8054 layout_type (float128_type);
8055 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8058 /* Under HPUX, this is a synonym for "long double". */
8059 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8062 #define def_builtin(name, type, code) \
8063 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
8066 def_builtin ("__builtin_ia64_bsp",
8067 build_function_type (ptr_type_node, void_list_node),
8070 def_builtin ("__builtin_ia64_flushrs",
8071 build_function_type (void_type_node, void_list_node),
8072 IA64_BUILTIN_FLUSHRS);
8078 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8079 enum machine_mode mode ATTRIBUTE_UNUSED,
8080 int ignore ATTRIBUTE_UNUSED)
8082 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8083 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8087 case IA64_BUILTIN_BSP:
8088 if (! target || ! register_operand (target, DImode))
8089 target = gen_reg_rtx (DImode);
8090 emit_insn (gen_bsp_value (target));
8091 #ifdef POINTERS_EXTEND_UNSIGNED
8092 target = convert_memory_address (ptr_mode, target);
8096 case IA64_BUILTIN_FLUSHRS:
8097 emit_insn (gen_flushrs ());
8107 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8108 most significant bits of the stack slot. */
8111 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8113 /* Exception to normal case for structures/unions/etc. */
8115 if (type && AGGREGATE_TYPE_P (type)
8116 && int_size_in_bytes (type) < UNITS_PER_WORD)
8119 /* Fall back to the default. */
8120 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8123 /* Linked list of all external functions that are to be emitted by GCC.
8124 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8125 order to avoid putting out names that are never really used. */
8127 struct extern_func_list GTY(())
8129 struct extern_func_list *next;
8133 static GTY(()) struct extern_func_list *extern_func_head;
8136 ia64_hpux_add_extern_decl (tree decl)
8138 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8141 p->next = extern_func_head;
8142 extern_func_head = p;
8145 /* Print out the list of used global functions. */
8148 ia64_hpux_file_end (void)
8150 struct extern_func_list *p;
8152 for (p = extern_func_head; p; p = p->next)
8154 tree decl = p->decl;
8155 tree id = DECL_ASSEMBLER_NAME (decl);
8159 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8161 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8163 TREE_ASM_WRITTEN (decl) = 1;
8164 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8165 fputs (TYPE_ASM_OP, asm_out_file);
8166 assemble_name (asm_out_file, name);
8167 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8171 extern_func_head = 0;
8174 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8175 modes of word_mode and larger. Rename the TFmode libfuncs using the
8176 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8177 backward compatibility. */
8180 ia64_init_libfuncs (void)
8182 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8183 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8184 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8185 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8187 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8188 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8189 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8190 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8191 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8193 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8194 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8195 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8196 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8197 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8198 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8200 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8201 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8202 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8203 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8205 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8206 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8209 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8212 ia64_hpux_init_libfuncs (void)
8214 ia64_init_libfuncs ();
8216 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8217 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8218 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8220 /* ia64_expand_compare uses this. */
8221 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8223 /* These should never be used. */
8224 set_optab_libfunc (eq_optab, TFmode, 0);
8225 set_optab_libfunc (ne_optab, TFmode, 0);
8226 set_optab_libfunc (gt_optab, TFmode, 0);
8227 set_optab_libfunc (ge_optab, TFmode, 0);
8228 set_optab_libfunc (lt_optab, TFmode, 0);
8229 set_optab_libfunc (le_optab, TFmode, 0);
8232 /* Rename the division and modulus functions in VMS. */
8235 ia64_vms_init_libfuncs (void)
8237 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8238 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8239 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8240 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8241 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8242 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8243 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8244 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8247 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8248 the HPUX conventions. */
8251 ia64_sysv4_init_libfuncs (void)
8253 ia64_init_libfuncs ();
8255 /* These functions are not part of the HPUX TFmode interface. We
8256 use them instead of _U_Qfcmp, which doesn't work the way we
8258 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8259 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8260 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8261 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8262 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8263 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8265 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8266 glibc doesn't have them. */
8269 /* Switch to the section to which we should output X. The only thing
8270 special we do here is to honor small data. */
8273 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8274 unsigned HOST_WIDE_INT align)
8276 if (GET_MODE_SIZE (mode) > 0
8277 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8280 default_elf_select_rtx_section (mode, x, align);
8283 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8284 Pretend flag_pic is always set. */
8287 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8289 default_elf_select_section_1 (exp, reloc, align, true);
8293 ia64_rwreloc_unique_section (tree decl, int reloc)
8295 default_unique_section_1 (decl, reloc, true);
8299 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8300 unsigned HOST_WIDE_INT align)
8302 int save_pic = flag_pic;
8304 ia64_select_rtx_section (mode, x, align);
8305 flag_pic = save_pic;
8308 #ifndef TARGET_RWRELOC
8309 #define TARGET_RWRELOC flag_pic
8313 ia64_section_type_flags (tree decl, const char *name, int reloc)
8315 unsigned int flags = 0;
8317 if (strcmp (name, ".sdata") == 0
8318 || strncmp (name, ".sdata.", 7) == 0
8319 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
8320 || strncmp (name, ".sdata2.", 8) == 0
8321 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
8322 || strcmp (name, ".sbss") == 0
8323 || strncmp (name, ".sbss.", 6) == 0
8324 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
8325 flags = SECTION_SMALL;
8327 flags |= default_section_type_flags_1 (decl, name, reloc, TARGET_RWRELOC);
8331 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8332 structure type and that the address of that type should be passed
8333 in out0, rather than in r8. */
8336 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8338 tree ret_type = TREE_TYPE (fntype);
8340 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8341 as the structure return address parameter, if the return value
8342 type has a non-trivial copy constructor or destructor. It is not
8343 clear if this same convention should be used for other
8344 programming languages. Until G++ 3.4, we incorrectly used r8 for
8345 these return values. */
8346 return (abi_version_at_least (2)
8348 && TYPE_MODE (ret_type) == BLKmode
8349 && TREE_ADDRESSABLE (ret_type)
8350 && strcmp (lang_hooks.name, "GNU C++") == 0);
8353 /* Output the assembler code for a thunk function. THUNK_DECL is the
8354 declaration for the thunk function itself, FUNCTION is the decl for
8355 the target function. DELTA is an immediate constant offset to be
8356 added to THIS. If VCALL_OFFSET is nonzero, the word at
8357 *(*this + vcall_offset) should be added to THIS. */
8360 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8361 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8364 rtx this, insn, funexp;
8365 unsigned int this_parmno;
8366 unsigned int this_regno;
8368 reload_completed = 1;
8369 epilogue_completed = 1;
8371 reset_block_changes ();
8373 /* Set things up as ia64_expand_prologue might. */
8374 last_scratch_gr_reg = 15;
8376 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8377 current_frame_info.spill_cfa_off = -16;
8378 current_frame_info.n_input_regs = 1;
8379 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8381 /* Mark the end of the (empty) prologue. */
8382 emit_note (NOTE_INSN_PROLOGUE_END);
8384 /* Figure out whether "this" will be the first parameter (the
8385 typical case) or the second parameter (as happens when the
8386 virtual function returns certain class objects). */
8388 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8390 this_regno = IN_REG (this_parmno);
8391 if (!TARGET_REG_NAMES)
8392 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8394 this = gen_rtx_REG (Pmode, this_regno);
8397 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8398 REG_POINTER (tmp) = 1;
8399 if (delta && CONST_OK_FOR_I (delta))
8401 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8405 emit_insn (gen_ptr_extend (this, tmp));
8408 /* Apply the constant offset, if required. */
8411 rtx delta_rtx = GEN_INT (delta);
8413 if (!CONST_OK_FOR_I (delta))
8415 rtx tmp = gen_rtx_REG (Pmode, 2);
8416 emit_move_insn (tmp, delta_rtx);
8419 emit_insn (gen_adddi3 (this, this, delta_rtx));
8422 /* Apply the offset from the vtable, if required. */
8425 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8426 rtx tmp = gen_rtx_REG (Pmode, 2);
8430 rtx t = gen_rtx_REG (ptr_mode, 2);
8431 REG_POINTER (t) = 1;
8432 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8433 if (CONST_OK_FOR_I (vcall_offset))
8435 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8440 emit_insn (gen_ptr_extend (tmp, t));
8443 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8447 if (!CONST_OK_FOR_J (vcall_offset))
8449 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8450 emit_move_insn (tmp2, vcall_offset_rtx);
8451 vcall_offset_rtx = tmp2;
8453 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8457 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8458 gen_rtx_MEM (ptr_mode, tmp));
8460 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8462 emit_insn (gen_adddi3 (this, this, tmp));
8465 /* Generate a tail call to the target function. */
8466 if (! TREE_USED (function))
8468 assemble_external (function);
8469 TREE_USED (function) = 1;
8471 funexp = XEXP (DECL_RTL (function), 0);
8472 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8473 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8474 insn = get_last_insn ();
8475 SIBLING_CALL_P (insn) = 1;
8477 /* Code generation for calls relies on splitting. */
8478 reload_completed = 1;
8479 epilogue_completed = 1;
8480 try_split (PATTERN (insn), insn, 0);
8484 /* Run just enough of rest_of_compilation to get the insns emitted.
8485 There's not really enough bulk here to make other passes such as
8486 instruction scheduling worth while. Note that use_thunk calls
8487 assemble_start_function and assemble_end_function. */
8489 insn_locators_initialize ();
8490 emit_all_insn_group_barriers (NULL);
8491 insn = get_insns ();
8492 shorten_branches (insn);
8493 final_start_function (insn, file, 1);
8494 final (insn, file, 1);
8495 final_end_function ();
8497 reload_completed = 0;
8498 epilogue_completed = 0;
8502 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8505 ia64_struct_value_rtx (tree fntype,
8506 int incoming ATTRIBUTE_UNUSED)
8508 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8510 return gen_rtx_REG (Pmode, GR_REG (8));
8514 ia64_scalar_mode_supported_p (enum machine_mode mode)
8539 ia64_vector_mode_supported_p (enum machine_mode mode)
8556 #include "gt-ia64.h"