1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label = 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def * ia64_compare_op0;
63 struct rtx_def * ia64_compare_op1;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string;
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size = 22;
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string;
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune;
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string;
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2;
121 /* Determines whether we run variable tracking in machine dependent
123 static int ia64_flag_var_tracking;
125 /* Variables which are this size or smaller are put in the sdata/sbss
128 unsigned int ia64_section_threshold;
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
144 HARD_REG_SET mask; /* mask of saved registers. */
145 unsigned int gr_used_mask; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled; /* number of spilled registers. */
148 int reg_fp; /* register for fp. */
149 int reg_save_b0; /* save register for b0. */
150 int reg_save_pr; /* save register for prs. */
151 int reg_save_ar_pfs; /* save register for ar.pfs. */
152 int reg_save_ar_unat; /* save register for ar.unat. */
153 int reg_save_ar_lc; /* save register for ar.lc. */
154 int reg_save_gp; /* save register for gp. */
155 int n_input_regs; /* number of input registers used. */
156 int n_local_regs; /* number of local registers used. */
157 int n_output_regs; /* number of output registers used. */
158 int n_rotate_regs; /* number of rotating registers used. */
160 char need_regstk; /* true if a .regstk directive needed. */
161 char initialized; /* true if the data is finalized. */
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info;
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx, rtx);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
173 static rtx gen_tls_get_addr (void);
174 static rtx gen_thread_pointer (void);
175 static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT);
180 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
181 static void finish_spill_pointers (void);
182 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
183 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
184 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
185 static rtx gen_movdi_x (rtx, rtx, rtx);
186 static rtx gen_fr_spill_x (rtx, rtx, rtx);
187 static rtx gen_fr_restore_x (rtx, rtx, rtx);
189 static enum machine_mode hfa_element_mode (tree, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
192 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
194 static bool ia64_function_ok_for_sibcall (tree, tree);
195 static bool ia64_return_in_memory (tree, tree);
196 static bool ia64_rtx_costs (rtx, int, int, int *);
197 static void fix_range (const char *);
198 static struct machine_function * ia64_init_machine_status (void);
199 static void emit_insn_group_barriers (FILE *);
200 static void emit_all_insn_group_barriers (FILE *);
201 static void final_emit_insn_group_barriers (FILE *);
202 static void emit_predicate_relation_info (void);
203 static void ia64_reorg (void);
204 static bool ia64_in_small_data_p (tree);
205 static void process_epilogue (void);
206 static int process_set (FILE *, rtx);
208 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
209 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
210 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
212 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
213 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
214 static bool ia64_assemble_integer (rtx, unsigned int, int);
215 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
216 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
217 static void ia64_output_function_end_prologue (FILE *);
219 static int ia64_issue_rate (void);
220 static int ia64_adjust_cost (rtx, rtx, rtx, int);
221 static void ia64_sched_init (FILE *, int, int);
222 static void ia64_sched_finish (FILE *, int);
223 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
224 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
225 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
226 static int ia64_variable_issue (FILE *, int, rtx, int);
228 static struct bundle_state *get_free_bundle_state (void);
229 static void free_bundle_state (struct bundle_state *);
230 static void initiate_bundle_states (void);
231 static void finish_bundle_states (void);
232 static unsigned bundle_state_hash (const void *);
233 static int bundle_state_eq_p (const void *, const void *);
234 static int insert_bundle_state (struct bundle_state *);
235 static void initiate_bundle_state_table (void);
236 static void finish_bundle_state_table (void);
237 static int try_issue_nops (struct bundle_state *, int);
238 static int try_issue_insn (struct bundle_state *, rtx);
239 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
240 static int get_max_pos (state_t);
241 static int get_template (state_t, int);
243 static rtx get_next_important_insn (rtx, rtx);
244 static void bundling (FILE *, int, rtx, rtx);
246 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
247 HOST_WIDE_INT, tree);
248 static void ia64_file_start (void);
250 static void ia64_select_rtx_section (enum machine_mode, rtx,
251 unsigned HOST_WIDE_INT);
252 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
254 static void ia64_rwreloc_unique_section (tree, int)
256 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
257 unsigned HOST_WIDE_INT)
259 static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
262 static void ia64_hpux_add_extern_decl (tree decl)
264 static void ia64_hpux_file_end (void)
266 static void ia64_init_libfuncs (void)
268 static void ia64_hpux_init_libfuncs (void)
270 static void ia64_sysv4_init_libfuncs (void)
272 static void ia64_vms_init_libfuncs (void)
275 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
276 static void ia64_encode_section_info (tree, rtx, int);
277 static rtx ia64_struct_value_rtx (tree, int);
278 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
281 /* Table of valid machine attributes. */
282 static const struct attribute_spec ia64_attribute_table[] =
284 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
285 { "syscall_linkage", 0, 0, false, true, true, NULL },
286 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
287 { NULL, 0, 0, false, false, false, NULL }
290 /* Initialize the GCC target structure. */
291 #undef TARGET_ATTRIBUTE_TABLE
292 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
294 #undef TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS ia64_init_builtins
297 #undef TARGET_EXPAND_BUILTIN
298 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
300 #undef TARGET_ASM_BYTE_OP
301 #define TARGET_ASM_BYTE_OP "\tdata1\t"
302 #undef TARGET_ASM_ALIGNED_HI_OP
303 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
304 #undef TARGET_ASM_ALIGNED_SI_OP
305 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
306 #undef TARGET_ASM_ALIGNED_DI_OP
307 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
308 #undef TARGET_ASM_UNALIGNED_HI_OP
309 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
310 #undef TARGET_ASM_UNALIGNED_SI_OP
311 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
312 #undef TARGET_ASM_UNALIGNED_DI_OP
313 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
314 #undef TARGET_ASM_INTEGER
315 #define TARGET_ASM_INTEGER ia64_assemble_integer
317 #undef TARGET_ASM_FUNCTION_PROLOGUE
318 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
319 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
320 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
321 #undef TARGET_ASM_FUNCTION_EPILOGUE
322 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
324 #undef TARGET_IN_SMALL_DATA_P
325 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
327 #undef TARGET_SCHED_ADJUST_COST
328 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
329 #undef TARGET_SCHED_ISSUE_RATE
330 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
331 #undef TARGET_SCHED_VARIABLE_ISSUE
332 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
333 #undef TARGET_SCHED_INIT
334 #define TARGET_SCHED_INIT ia64_sched_init
335 #undef TARGET_SCHED_FINISH
336 #define TARGET_SCHED_FINISH ia64_sched_finish
337 #undef TARGET_SCHED_REORDER
338 #define TARGET_SCHED_REORDER ia64_sched_reorder
339 #undef TARGET_SCHED_REORDER2
340 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
342 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
343 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
345 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
346 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
348 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
349 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
350 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
351 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
353 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
354 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
355 ia64_first_cycle_multipass_dfa_lookahead_guard
357 #undef TARGET_SCHED_DFA_NEW_CYCLE
358 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
360 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
361 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
365 #undef TARGET_ASM_OUTPUT_MI_THUNK
366 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
367 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
368 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
370 #undef TARGET_ASM_FILE_START
371 #define TARGET_ASM_FILE_START ia64_file_start
373 #undef TARGET_RTX_COSTS
374 #define TARGET_RTX_COSTS ia64_rtx_costs
375 #undef TARGET_ADDRESS_COST
376 #define TARGET_ADDRESS_COST hook_int_rtx_0
378 #undef TARGET_MACHINE_DEPENDENT_REORG
379 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
381 #undef TARGET_ENCODE_SECTION_INFO
382 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
384 /* ??? ABI doesn't allow us to define this. */
386 #undef TARGET_PROMOTE_FUNCTION_ARGS
387 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
390 /* ??? ABI doesn't allow us to define this. */
392 #undef TARGET_PROMOTE_FUNCTION_RETURN
393 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
396 /* ??? Investigate. */
398 #undef TARGET_PROMOTE_PROTOTYPES
399 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
402 #undef TARGET_STRUCT_VALUE_RTX
403 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
404 #undef TARGET_RETURN_IN_MEMORY
405 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
406 #undef TARGET_SETUP_INCOMING_VARARGS
407 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
408 #undef TARGET_STRICT_ARGUMENT_NAMING
409 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
410 #undef TARGET_MUST_PASS_IN_STACK
411 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
413 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
414 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
416 #undef TARGET_UNWIND_EMIT
417 #define TARGET_UNWIND_EMIT process_for_unwind_directive
419 struct gcc_target targetm = TARGET_INITIALIZER;
423 ADDR_AREA_NORMAL, /* normal address area */
424 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
428 static GTY(()) tree small_ident1;
429 static GTY(()) tree small_ident2;
434 if (small_ident1 == 0)
436 small_ident1 = get_identifier ("small");
437 small_ident2 = get_identifier ("__small__");
441 /* Retrieve the address area that has been chosen for the given decl. */
443 static ia64_addr_area
444 ia64_get_addr_area (tree decl)
448 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
454 id = TREE_VALUE (TREE_VALUE (model_attr));
455 if (id == small_ident1 || id == small_ident2)
456 return ADDR_AREA_SMALL;
458 return ADDR_AREA_NORMAL;
462 ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
464 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
466 tree arg, decl = *node;
469 arg = TREE_VALUE (args);
470 if (arg == small_ident1 || arg == small_ident2)
472 addr_area = ADDR_AREA_SMALL;
476 warning ("invalid argument of `%s' attribute",
477 IDENTIFIER_POINTER (name));
478 *no_add_attrs = true;
481 switch (TREE_CODE (decl))
484 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
486 && !TREE_STATIC (decl))
488 error ("%Jan address area attribute cannot be specified for "
489 "local variables", decl, decl);
490 *no_add_attrs = true;
492 area = ia64_get_addr_area (decl);
493 if (area != ADDR_AREA_NORMAL && addr_area != area)
495 error ("%Jaddress area of '%s' conflicts with previous "
496 "declaration", decl, decl);
497 *no_add_attrs = true;
502 error ("%Jaddress area attribute cannot be specified for functions",
504 *no_add_attrs = true;
508 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
509 *no_add_attrs = true;
517 ia64_encode_addr_area (tree decl, rtx symbol)
521 flags = SYMBOL_REF_FLAGS (symbol);
522 switch (ia64_get_addr_area (decl))
524 case ADDR_AREA_NORMAL: break;
525 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
528 SYMBOL_REF_FLAGS (symbol) = flags;
532 ia64_encode_section_info (tree decl, rtx rtl, int first)
534 default_encode_section_info (decl, rtl, first);
536 /* Careful not to prod global register variables. */
537 if (TREE_CODE (decl) == VAR_DECL
538 && GET_CODE (DECL_RTL (decl)) == MEM
539 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
540 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
541 ia64_encode_addr_area (decl, XEXP (rtl, 0));
544 /* Return 1 if the operands of a move are ok. */
547 ia64_move_ok (rtx dst, rtx src)
549 /* If we're under init_recog_no_volatile, we'll not be able to use
550 memory_operand. So check the code directly and don't worry about
551 the validity of the underlying address, which should have been
552 checked elsewhere anyway. */
553 if (GET_CODE (dst) != MEM)
555 if (GET_CODE (src) == MEM)
557 if (register_operand (src, VOIDmode))
560 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
561 if (INTEGRAL_MODE_P (GET_MODE (dst)))
562 return src == const0_rtx;
564 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
568 addp4_optimize_ok (rtx op1, rtx op2)
570 return (basereg_operand (op1, GET_MODE(op1)) !=
571 basereg_operand (op2, GET_MODE(op2)));
574 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
575 Return the length of the field, or <= 0 on failure. */
578 ia64_depz_field_mask (rtx rop, rtx rshift)
580 unsigned HOST_WIDE_INT op = INTVAL (rop);
581 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
583 /* Get rid of the zero bits we're shifting in. */
586 /* We must now have a solid block of 1's at bit 0. */
587 return exact_log2 (op + 1);
590 /* Expand a symbolic constant load. */
593 ia64_expand_load_address (rtx dest, rtx src)
595 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (src))
597 if (GET_CODE (dest) != REG)
600 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
601 having to pointer-extend the value afterward. Other forms of address
602 computation below are also more natural to compute as 64-bit quantities.
603 If we've been given an SImode destination register, change it. */
604 if (GET_MODE (dest) != Pmode)
605 dest = gen_rtx_REG (Pmode, REGNO (dest));
607 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
609 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
612 else if (TARGET_AUTO_PIC)
614 emit_insn (gen_load_gprel64 (dest, src));
617 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
619 emit_insn (gen_load_fptr (dest, src));
622 else if (sdata_symbolic_operand (src, VOIDmode))
624 emit_insn (gen_load_gprel (dest, src));
628 if (GET_CODE (src) == CONST
629 && GET_CODE (XEXP (src, 0)) == PLUS
630 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
631 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x3fff) != 0)
633 rtx sym = XEXP (XEXP (src, 0), 0);
634 HOST_WIDE_INT ofs, hi, lo;
636 /* Split the offset into a sign extended 14-bit low part
637 and a complementary high part. */
638 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
639 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
642 ia64_expand_load_address (dest, plus_constant (sym, hi));
643 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
649 tmp = gen_rtx_HIGH (Pmode, src);
650 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
651 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
653 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
654 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
658 static GTY(()) rtx gen_tls_tga;
660 gen_tls_get_addr (void)
663 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
667 static GTY(()) rtx thread_pointer_rtx;
669 gen_thread_pointer (void)
671 if (!thread_pointer_rtx)
673 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
674 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
676 return thread_pointer_rtx;
680 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
682 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
687 case TLS_MODEL_GLOBAL_DYNAMIC:
690 tga_op1 = gen_reg_rtx (Pmode);
691 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
692 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
693 RTX_UNCHANGING_P (tga_op1) = 1;
695 tga_op2 = gen_reg_rtx (Pmode);
696 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
697 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
698 RTX_UNCHANGING_P (tga_op2) = 1;
700 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
701 LCT_CONST, Pmode, 2, tga_op1,
702 Pmode, tga_op2, Pmode);
704 insns = get_insns ();
707 if (GET_MODE (op0) != Pmode)
709 emit_libcall_block (insns, op0, tga_ret, op1);
712 case TLS_MODEL_LOCAL_DYNAMIC:
713 /* ??? This isn't the completely proper way to do local-dynamic
714 If the call to __tls_get_addr is used only by a single symbol,
715 then we should (somehow) move the dtprel to the second arg
716 to avoid the extra add. */
719 tga_op1 = gen_reg_rtx (Pmode);
720 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
721 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
722 RTX_UNCHANGING_P (tga_op1) = 1;
724 tga_op2 = const0_rtx;
726 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
727 LCT_CONST, Pmode, 2, tga_op1,
728 Pmode, tga_op2, Pmode);
730 insns = get_insns ();
733 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
735 tmp = gen_reg_rtx (Pmode);
736 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
738 if (!register_operand (op0, Pmode))
739 op0 = gen_reg_rtx (Pmode);
742 emit_insn (gen_load_dtprel (op0, op1));
743 emit_insn (gen_adddi3 (op0, tmp, op0));
746 emit_insn (gen_add_dtprel (op0, tmp, op1));
749 case TLS_MODEL_INITIAL_EXEC:
750 tmp = gen_reg_rtx (Pmode);
751 emit_insn (gen_load_ltoff_tprel (tmp, op1));
752 tmp = gen_rtx_MEM (Pmode, tmp);
753 RTX_UNCHANGING_P (tmp) = 1;
754 tmp = force_reg (Pmode, tmp);
756 if (!register_operand (op0, Pmode))
757 op0 = gen_reg_rtx (Pmode);
758 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
761 case TLS_MODEL_LOCAL_EXEC:
762 if (!register_operand (op0, Pmode))
763 op0 = gen_reg_rtx (Pmode);
766 emit_insn (gen_load_tprel (op0, op1));
767 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
770 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
779 if (GET_MODE (orig_op0) == Pmode)
781 return gen_lowpart (GET_MODE (orig_op0), op0);
785 ia64_expand_move (rtx op0, rtx op1)
787 enum machine_mode mode = GET_MODE (op0);
789 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
790 op1 = force_reg (mode, op1);
792 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
794 enum tls_model tls_kind;
795 if (GET_CODE (op1) == SYMBOL_REF
796 && (tls_kind = SYMBOL_REF_TLS_MODEL (op1)))
797 return ia64_expand_tls_address (tls_kind, op0, op1);
799 if (!TARGET_NO_PIC && reload_completed)
801 ia64_expand_load_address (op0, op1);
809 /* Split a move from OP1 to OP0 conditional on COND. */
812 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
814 rtx insn, first = get_last_insn ();
816 emit_move_insn (op0, op1);
818 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
820 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
824 /* Split a post-reload TImode or TFmode reference into two DImode
825 components. This is made extra difficult by the fact that we do
826 not get any scratch registers to work with, because reload cannot
827 be prevented from giving us a scratch that overlaps the register
828 pair involved. So instead, when addressing memory, we tweak the
829 pointer register up and back down with POST_INCs. Or up and not
830 back down when we can get away with it.
832 REVERSED is true when the loads must be done in reversed order
833 (high word first) for correctness. DEAD is true when the pointer
834 dies with the second insn we generate and therefore the second
835 address must not carry a postmodify.
837 May return an insn which is to be emitted after the moves. */
840 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
844 switch (GET_CODE (in))
847 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
848 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
853 /* Cannot occur reversed. */
854 if (reversed) abort ();
856 if (GET_MODE (in) != TFmode)
857 split_double (in, &out[0], &out[1]);
859 /* split_double does not understand how to split a TFmode
860 quantity into a pair of DImode constants. */
863 unsigned HOST_WIDE_INT p[2];
864 long l[4]; /* TFmode is 128 bits */
866 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
867 real_to_target (l, &r, TFmode);
869 if (FLOAT_WORDS_BIG_ENDIAN)
871 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
872 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
876 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
877 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
879 out[0] = GEN_INT (p[0]);
880 out[1] = GEN_INT (p[1]);
886 rtx base = XEXP (in, 0);
889 switch (GET_CODE (base))
894 out[0] = adjust_automodify_address
895 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
896 out[1] = adjust_automodify_address
897 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
901 /* Reversal requires a pre-increment, which can only
902 be done as a separate insn. */
903 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
904 out[0] = adjust_automodify_address
905 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
906 out[1] = adjust_address (in, DImode, 0);
911 if (reversed || dead) abort ();
912 /* Just do the increment in two steps. */
913 out[0] = adjust_automodify_address (in, DImode, 0, 0);
914 out[1] = adjust_automodify_address (in, DImode, 0, 8);
918 if (reversed || dead) abort ();
919 /* Add 8, subtract 24. */
920 base = XEXP (base, 0);
921 out[0] = adjust_automodify_address
922 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
923 out[1] = adjust_automodify_address
925 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
930 if (reversed || dead) abort ();
931 /* Extract and adjust the modification. This case is
932 trickier than the others, because we might have an
933 index register, or we might have a combined offset that
934 doesn't fit a signed 9-bit displacement field. We can
935 assume the incoming expression is already legitimate. */
936 offset = XEXP (base, 1);
937 base = XEXP (base, 0);
939 out[0] = adjust_automodify_address
940 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
942 if (GET_CODE (XEXP (offset, 1)) == REG)
944 /* Can't adjust the postmodify to match. Emit the
945 original, then a separate addition insn. */
946 out[1] = adjust_automodify_address (in, DImode, 0, 8);
947 fixup = gen_adddi3 (base, base, GEN_INT (-8));
949 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
951 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
953 /* Again the postmodify cannot be made to match, but
954 in this case it's more efficient to get rid of the
955 postmodify entirely and fix up with an add insn. */
956 out[1] = adjust_automodify_address (in, DImode, base, 8);
957 fixup = gen_adddi3 (base, base,
958 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
962 /* Combined offset still fits in the displacement field.
963 (We cannot overflow it at the high end.) */
964 out[1] = adjust_automodify_address
966 gen_rtx_POST_MODIFY (Pmode, base,
967 gen_rtx_PLUS (Pmode, base,
968 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
986 /* Split a TImode or TFmode move instruction after reload.
987 This is used by *movtf_internal and *movti_internal. */
989 ia64_split_tmode_move (rtx operands[])
991 rtx in[2], out[2], insn;
994 bool reversed = false;
996 /* It is possible for reload to decide to overwrite a pointer with
997 the value it points to. In that case we have to do the loads in
998 the appropriate order so that the pointer is not destroyed too
999 early. Also we must not generate a postmodify for that second
1000 load, or rws_access_regno will abort. */
1001 if (GET_CODE (operands[1]) == MEM
1002 && reg_overlap_mentioned_p (operands[0], operands[1]))
1004 rtx base = XEXP (operands[1], 0);
1005 while (GET_CODE (base) != REG)
1006 base = XEXP (base, 0);
1008 if (REGNO (base) == REGNO (operands[0]))
1012 /* Another reason to do the moves in reversed order is if the first
1013 element of the target register pair is also the second element of
1014 the source register pair. */
1015 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1016 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1019 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1020 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1022 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1023 if (GET_CODE (EXP) == MEM \
1024 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1025 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1026 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1027 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1028 XEXP (XEXP (EXP, 0), 0), \
1031 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1032 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1033 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1035 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1036 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1037 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1040 emit_insn (fixup[0]);
1042 emit_insn (fixup[1]);
1044 #undef MAYBE_ADD_REG_INC_NOTE
1047 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1048 through memory plus an extra GR scratch register. Except that you can
1049 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1050 SECONDARY_RELOAD_CLASS, but not both.
1052 We got into problems in the first place by allowing a construct like
1053 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1054 This solution attempts to prevent this situation from occurring. When
1055 we see something like the above, we spill the inner register to memory. */
1058 spill_xfmode_operand (rtx in, int force)
1060 if (GET_CODE (in) == SUBREG
1061 && GET_MODE (SUBREG_REG (in)) == TImode
1062 && GET_CODE (SUBREG_REG (in)) == REG)
1064 rtx memt = assign_stack_temp (TImode, 16, 0);
1065 emit_move_insn (memt, SUBREG_REG (in));
1066 return adjust_address (memt, XFmode, 0);
1068 else if (force && GET_CODE (in) == REG)
1070 rtx memx = assign_stack_temp (XFmode, 16, 0);
1071 emit_move_insn (memx, in);
1078 /* Emit comparison instruction if necessary, returning the expression
1079 that holds the compare result in the proper mode. */
1081 static GTY(()) rtx cmptf_libfunc;
1084 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1086 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1089 /* If we have a BImode input, then we already have a compare result, and
1090 do not need to emit another comparison. */
1091 if (GET_MODE (op0) == BImode)
1093 if ((code == NE || code == EQ) && op1 == const0_rtx)
1098 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1099 magic number as its third argument, that indicates what to do.
1100 The return value is an integer to be compared against zero. */
1101 else if (GET_MODE (op0) == TFmode)
1104 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1110 enum rtx_code ncode;
1112 if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
1116 /* 1 = equal, 0 = not equal. Equality operators do
1117 not raise FP_INVALID when given an SNaN operand. */
1118 case EQ: magic = QCMP_EQ; ncode = NE; break;
1119 case NE: magic = QCMP_EQ; ncode = EQ; break;
1120 /* isunordered() from C99. */
1121 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1122 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1123 /* Relational operators raise FP_INVALID when given
1125 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1126 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1127 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1128 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1129 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1130 Expanders for buneq etc. weuld have to be added to ia64.md
1131 for this to be useful. */
1137 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1138 op0, TFmode, op1, TFmode,
1139 GEN_INT (magic), DImode);
1140 cmp = gen_reg_rtx (BImode);
1141 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1142 gen_rtx_fmt_ee (ncode, BImode,
1145 insns = get_insns ();
1148 emit_libcall_block (insns, cmp, cmp,
1149 gen_rtx_fmt_ee (code, BImode, op0, op1));
1154 cmp = gen_reg_rtx (BImode);
1155 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1156 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1160 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1163 /* Emit the appropriate sequence for a call. */
1166 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1171 addr = XEXP (addr, 0);
1172 addr = convert_memory_address (DImode, addr);
1173 b0 = gen_rtx_REG (DImode, R_BR (0));
1175 /* ??? Should do this for functions known to bind local too. */
1176 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1179 insn = gen_sibcall_nogp (addr);
1181 insn = gen_call_nogp (addr, b0);
1183 insn = gen_call_value_nogp (retval, addr, b0);
1184 insn = emit_call_insn (insn);
1189 insn = gen_sibcall_gp (addr);
1191 insn = gen_call_gp (addr, b0);
1193 insn = gen_call_value_gp (retval, addr, b0);
1194 insn = emit_call_insn (insn);
1196 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1200 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1204 ia64_reload_gp (void)
1208 if (current_frame_info.reg_save_gp)
1209 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1212 HOST_WIDE_INT offset;
1214 offset = (current_frame_info.spill_cfa_off
1215 + current_frame_info.spill_size);
1216 if (frame_pointer_needed)
1218 tmp = hard_frame_pointer_rtx;
1223 tmp = stack_pointer_rtx;
1224 offset = current_frame_info.total_size - offset;
1227 if (CONST_OK_FOR_I (offset))
1228 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1229 tmp, GEN_INT (offset)));
1232 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1233 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1234 pic_offset_table_rtx, tmp));
1237 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1240 emit_move_insn (pic_offset_table_rtx, tmp);
1244 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1245 rtx scratch_b, int noreturn_p, int sibcall_p)
1248 bool is_desc = false;
1250 /* If we find we're calling through a register, then we're actually
1251 calling through a descriptor, so load up the values. */
1252 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1257 /* ??? We are currently constrained to *not* use peep2, because
1258 we can legitimately change the global lifetime of the GP
1259 (in the form of killing where previously live). This is
1260 because a call through a descriptor doesn't use the previous
1261 value of the GP, while a direct call does, and we do not
1262 commit to either form until the split here.
1264 That said, this means that we lack precise life info for
1265 whether ADDR is dead after this call. This is not terribly
1266 important, since we can fix things up essentially for free
1267 with the POST_DEC below, but it's nice to not use it when we
1268 can immediately tell it's not necessary. */
1269 addr_dead_p = ((noreturn_p || sibcall_p
1270 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1272 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1274 /* Load the code address into scratch_b. */
1275 tmp = gen_rtx_POST_INC (Pmode, addr);
1276 tmp = gen_rtx_MEM (Pmode, tmp);
1277 emit_move_insn (scratch_r, tmp);
1278 emit_move_insn (scratch_b, scratch_r);
1280 /* Load the GP address. If ADDR is not dead here, then we must
1281 revert the change made above via the POST_INCREMENT. */
1283 tmp = gen_rtx_POST_DEC (Pmode, addr);
1286 tmp = gen_rtx_MEM (Pmode, tmp);
1287 emit_move_insn (pic_offset_table_rtx, tmp);
1294 insn = gen_sibcall_nogp (addr);
1296 insn = gen_call_value_nogp (retval, addr, retaddr);
1298 insn = gen_call_nogp (addr, retaddr);
1299 emit_call_insn (insn);
1301 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1305 /* Begin the assembly file. */
1308 ia64_file_start (void)
1310 default_file_start ();
1311 emit_safe_across_calls ();
1315 emit_safe_across_calls (void)
1317 unsigned int rs, re;
1324 while (rs < 64 && call_used_regs[PR_REG (rs)])
1328 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1332 fputs ("\t.pred.safe_across_calls ", asm_out_file);
1336 fputc (',', asm_out_file);
1338 fprintf (asm_out_file, "p%u", rs);
1340 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1344 fputc ('\n', asm_out_file);
1347 /* Helper function for ia64_compute_frame_size: find an appropriate general
1348 register to spill some special register to. SPECIAL_SPILL_MASK contains
1349 bits in GR0 to GR31 that have already been allocated by this routine.
1350 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1353 find_gr_spill (int try_locals)
1357 /* If this is a leaf function, first try an otherwise unused
1358 call-clobbered register. */
1359 if (current_function_is_leaf)
1361 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1362 if (! regs_ever_live[regno]
1363 && call_used_regs[regno]
1364 && ! fixed_regs[regno]
1365 && ! global_regs[regno]
1366 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1368 current_frame_info.gr_used_mask |= 1 << regno;
1375 regno = current_frame_info.n_local_regs;
1376 /* If there is a frame pointer, then we can't use loc79, because
1377 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1378 reg_name switching code in ia64_expand_prologue. */
1379 if (regno < (80 - frame_pointer_needed))
1381 current_frame_info.n_local_regs = regno + 1;
1382 return LOC_REG (0) + regno;
1386 /* Failed to find a general register to spill to. Must use stack. */
1390 /* In order to make for nice schedules, we try to allocate every temporary
1391 to a different register. We must of course stay away from call-saved,
1392 fixed, and global registers. We must also stay away from registers
1393 allocated in current_frame_info.gr_used_mask, since those include regs
1394 used all through the prologue.
1396 Any register allocated here must be used immediately. The idea is to
1397 aid scheduling, not to solve data flow problems. */
1399 static int last_scratch_gr_reg;
1402 next_scratch_gr_reg (void)
1406 for (i = 0; i < 32; ++i)
1408 regno = (last_scratch_gr_reg + i + 1) & 31;
1409 if (call_used_regs[regno]
1410 && ! fixed_regs[regno]
1411 && ! global_regs[regno]
1412 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1414 last_scratch_gr_reg = regno;
1419 /* There must be _something_ available. */
1423 /* Helper function for ia64_compute_frame_size, called through
1424 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1427 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
1429 unsigned int regno = REGNO (reg);
1432 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1433 for (i = 0; i < n; ++i)
1434 current_frame_info.gr_used_mask |= 1 << (regno + i);
1438 /* Returns the number of bytes offset between the frame pointer and the stack
1439 pointer for the current function. SIZE is the number of bytes of space
1440 needed for local variables. */
1443 ia64_compute_frame_size (HOST_WIDE_INT size)
1445 HOST_WIDE_INT total_size;
1446 HOST_WIDE_INT spill_size = 0;
1447 HOST_WIDE_INT extra_spill_size = 0;
1448 HOST_WIDE_INT pretend_args_size;
1451 int spilled_gr_p = 0;
1452 int spilled_fr_p = 0;
1456 if (current_frame_info.initialized)
1459 memset (¤t_frame_info, 0, sizeof current_frame_info);
1460 CLEAR_HARD_REG_SET (mask);
1462 /* Don't allocate scratches to the return register. */
1463 diddle_return_value (mark_reg_gr_used_mask, NULL);
1465 /* Don't allocate scratches to the EH scratch registers. */
1466 if (cfun->machine->ia64_eh_epilogue_sp)
1467 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1468 if (cfun->machine->ia64_eh_epilogue_bsp)
1469 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1471 /* Find the size of the register stack frame. We have only 80 local
1472 registers, because we reserve 8 for the inputs and 8 for the
1475 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1476 since we'll be adjusting that down later. */
1477 regno = LOC_REG (78) + ! frame_pointer_needed;
1478 for (; regno >= LOC_REG (0); regno--)
1479 if (regs_ever_live[regno])
1481 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1483 /* For functions marked with the syscall_linkage attribute, we must mark
1484 all eight input registers as in use, so that locals aren't visible to
1487 if (cfun->machine->n_varargs > 0
1488 || lookup_attribute ("syscall_linkage",
1489 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1490 current_frame_info.n_input_regs = 8;
1493 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1494 if (regs_ever_live[regno])
1496 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1499 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1500 if (regs_ever_live[regno])
1502 i = regno - OUT_REG (0) + 1;
1504 /* When -p profiling, we need one output register for the mcount argument.
1505 Likewise for -a profiling for the bb_init_func argument. For -ax
1506 profiling, we need two output registers for the two bb_init_trace_func
1508 if (current_function_profile)
1510 current_frame_info.n_output_regs = i;
1512 /* ??? No rotating register support yet. */
1513 current_frame_info.n_rotate_regs = 0;
1515 /* Discover which registers need spilling, and how much room that
1516 will take. Begin with floating point and general registers,
1517 which will always wind up on the stack. */
1519 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1520 if (regs_ever_live[regno] && ! call_used_regs[regno])
1522 SET_HARD_REG_BIT (mask, regno);
1528 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1529 if (regs_ever_live[regno] && ! call_used_regs[regno])
1531 SET_HARD_REG_BIT (mask, regno);
1537 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1538 if (regs_ever_live[regno] && ! call_used_regs[regno])
1540 SET_HARD_REG_BIT (mask, regno);
1545 /* Now come all special registers that might get saved in other
1546 general registers. */
1548 if (frame_pointer_needed)
1550 current_frame_info.reg_fp = find_gr_spill (1);
1551 /* If we did not get a register, then we take LOC79. This is guaranteed
1552 to be free, even if regs_ever_live is already set, because this is
1553 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1554 as we don't count loc79 above. */
1555 if (current_frame_info.reg_fp == 0)
1557 current_frame_info.reg_fp = LOC_REG (79);
1558 current_frame_info.n_local_regs++;
1562 if (! current_function_is_leaf)
1564 /* Emit a save of BR0 if we call other functions. Do this even
1565 if this function doesn't return, as EH depends on this to be
1566 able to unwind the stack. */
1567 SET_HARD_REG_BIT (mask, BR_REG (0));
1569 current_frame_info.reg_save_b0 = find_gr_spill (1);
1570 if (current_frame_info.reg_save_b0 == 0)
1576 /* Similarly for ar.pfs. */
1577 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1578 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1579 if (current_frame_info.reg_save_ar_pfs == 0)
1581 extra_spill_size += 8;
1585 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1586 registers are clobbered, so we fall back to the stack. */
1587 current_frame_info.reg_save_gp
1588 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1589 if (current_frame_info.reg_save_gp == 0)
1591 SET_HARD_REG_BIT (mask, GR_REG (1));
1598 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1600 SET_HARD_REG_BIT (mask, BR_REG (0));
1605 if (regs_ever_live[AR_PFS_REGNUM])
1607 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1608 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1609 if (current_frame_info.reg_save_ar_pfs == 0)
1611 extra_spill_size += 8;
1617 /* Unwind descriptor hackery: things are most efficient if we allocate
1618 consecutive GR save registers for RP, PFS, FP in that order. However,
1619 it is absolutely critical that FP get the only hard register that's
1620 guaranteed to be free, so we allocated it first. If all three did
1621 happen to be allocated hard regs, and are consecutive, rearrange them
1622 into the preferred order now. */
1623 if (current_frame_info.reg_fp != 0
1624 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1625 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1627 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1628 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1629 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1632 /* See if we need to store the predicate register block. */
1633 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1634 if (regs_ever_live[regno] && ! call_used_regs[regno])
1636 if (regno <= PR_REG (63))
1638 SET_HARD_REG_BIT (mask, PR_REG (0));
1639 current_frame_info.reg_save_pr = find_gr_spill (1);
1640 if (current_frame_info.reg_save_pr == 0)
1642 extra_spill_size += 8;
1646 /* ??? Mark them all as used so that register renaming and such
1647 are free to use them. */
1648 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1649 regs_ever_live[regno] = 1;
1652 /* If we're forced to use st8.spill, we're forced to save and restore
1653 ar.unat as well. The check for existing liveness allows inline asm
1654 to touch ar.unat. */
1655 if (spilled_gr_p || cfun->machine->n_varargs
1656 || regs_ever_live[AR_UNAT_REGNUM])
1658 regs_ever_live[AR_UNAT_REGNUM] = 1;
1659 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1660 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1661 if (current_frame_info.reg_save_ar_unat == 0)
1663 extra_spill_size += 8;
1668 if (regs_ever_live[AR_LC_REGNUM])
1670 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1671 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1672 if (current_frame_info.reg_save_ar_lc == 0)
1674 extra_spill_size += 8;
1679 /* If we have an odd number of words of pretend arguments written to
1680 the stack, then the FR save area will be unaligned. We round the
1681 size of this area up to keep things 16 byte aligned. */
1683 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1685 pretend_args_size = current_function_pretend_args_size;
1687 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1688 + current_function_outgoing_args_size);
1689 total_size = IA64_STACK_ALIGN (total_size);
1691 /* We always use the 16-byte scratch area provided by the caller, but
1692 if we are a leaf function, there's no one to which we need to provide
1694 if (current_function_is_leaf)
1695 total_size = MAX (0, total_size - 16);
1697 current_frame_info.total_size = total_size;
1698 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1699 current_frame_info.spill_size = spill_size;
1700 current_frame_info.extra_spill_size = extra_spill_size;
1701 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1702 current_frame_info.n_spilled = n_spilled;
1703 current_frame_info.initialized = reload_completed;
1706 /* Compute the initial difference between the specified pair of registers. */
1709 ia64_initial_elimination_offset (int from, int to)
1711 HOST_WIDE_INT offset;
1713 ia64_compute_frame_size (get_frame_size ());
1716 case FRAME_POINTER_REGNUM:
1717 if (to == HARD_FRAME_POINTER_REGNUM)
1719 if (current_function_is_leaf)
1720 offset = -current_frame_info.total_size;
1722 offset = -(current_frame_info.total_size
1723 - current_function_outgoing_args_size - 16);
1725 else if (to == STACK_POINTER_REGNUM)
1727 if (current_function_is_leaf)
1730 offset = 16 + current_function_outgoing_args_size;
1736 case ARG_POINTER_REGNUM:
1737 /* Arguments start above the 16 byte save area, unless stdarg
1738 in which case we store through the 16 byte save area. */
1739 if (to == HARD_FRAME_POINTER_REGNUM)
1740 offset = 16 - current_function_pretend_args_size;
1741 else if (to == STACK_POINTER_REGNUM)
1742 offset = (current_frame_info.total_size
1743 + 16 - current_function_pretend_args_size);
1755 /* If there are more than a trivial number of register spills, we use
1756 two interleaved iterators so that we can get two memory references
1759 In order to simplify things in the prologue and epilogue expanders,
1760 we use helper functions to fix up the memory references after the
1761 fact with the appropriate offsets to a POST_MODIFY memory mode.
1762 The following data structure tracks the state of the two iterators
1763 while insns are being emitted. */
1765 struct spill_fill_data
1767 rtx init_after; /* point at which to emit initializations */
1768 rtx init_reg[2]; /* initial base register */
1769 rtx iter_reg[2]; /* the iterator registers */
1770 rtx *prev_addr[2]; /* address of last memory use */
1771 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1772 HOST_WIDE_INT prev_off[2]; /* last offset */
1773 int n_iter; /* number of iterators in use */
1774 int next_iter; /* next iterator to use */
1775 unsigned int save_gr_used_mask;
1778 static struct spill_fill_data spill_fill_data;
1781 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
1785 spill_fill_data.init_after = get_last_insn ();
1786 spill_fill_data.init_reg[0] = init_reg;
1787 spill_fill_data.init_reg[1] = init_reg;
1788 spill_fill_data.prev_addr[0] = NULL;
1789 spill_fill_data.prev_addr[1] = NULL;
1790 spill_fill_data.prev_insn[0] = NULL;
1791 spill_fill_data.prev_insn[1] = NULL;
1792 spill_fill_data.prev_off[0] = cfa_off;
1793 spill_fill_data.prev_off[1] = cfa_off;
1794 spill_fill_data.next_iter = 0;
1795 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1797 spill_fill_data.n_iter = 1 + (n_spills > 2);
1798 for (i = 0; i < spill_fill_data.n_iter; ++i)
1800 int regno = next_scratch_gr_reg ();
1801 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1802 current_frame_info.gr_used_mask |= 1 << regno;
1807 finish_spill_pointers (void)
1809 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1813 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
1815 int iter = spill_fill_data.next_iter;
1816 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1817 rtx disp_rtx = GEN_INT (disp);
1820 if (spill_fill_data.prev_addr[iter])
1822 if (CONST_OK_FOR_N (disp))
1824 *spill_fill_data.prev_addr[iter]
1825 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1826 gen_rtx_PLUS (DImode,
1827 spill_fill_data.iter_reg[iter],
1829 REG_NOTES (spill_fill_data.prev_insn[iter])
1830 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1831 REG_NOTES (spill_fill_data.prev_insn[iter]));
1835 /* ??? Could use register post_modify for loads. */
1836 if (! CONST_OK_FOR_I (disp))
1838 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1839 emit_move_insn (tmp, disp_rtx);
1842 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1843 spill_fill_data.iter_reg[iter], disp_rtx));
1846 /* Micro-optimization: if we've created a frame pointer, it's at
1847 CFA 0, which may allow the real iterator to be initialized lower,
1848 slightly increasing parallelism. Also, if there are few saves
1849 it may eliminate the iterator entirely. */
1851 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1852 && frame_pointer_needed)
1854 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1855 set_mem_alias_set (mem, get_varargs_alias_set ());
1863 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1864 spill_fill_data.init_reg[iter]);
1869 if (! CONST_OK_FOR_I (disp))
1871 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1872 emit_move_insn (tmp, disp_rtx);
1876 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1877 spill_fill_data.init_reg[iter],
1884 /* Careful for being the first insn in a sequence. */
1885 if (spill_fill_data.init_after)
1886 insn = emit_insn_after (seq, spill_fill_data.init_after);
1889 rtx first = get_insns ();
1891 insn = emit_insn_before (seq, first);
1893 insn = emit_insn (seq);
1895 spill_fill_data.init_after = insn;
1897 /* If DISP is 0, we may or may not have a further adjustment
1898 afterward. If we do, then the load/store insn may be modified
1899 to be a post-modify. If we don't, then this copy may be
1900 eliminated by copyprop_hardreg_forward, which makes this
1901 insn garbage, which runs afoul of the sanity check in
1902 propagate_one_insn. So mark this insn as legal to delete. */
1904 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1908 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1910 /* ??? Not all of the spills are for varargs, but some of them are.
1911 The rest of the spills belong in an alias set of their own. But
1912 it doesn't actually hurt to include them here. */
1913 set_mem_alias_set (mem, get_varargs_alias_set ());
1915 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1916 spill_fill_data.prev_off[iter] = cfa_off;
1918 if (++iter >= spill_fill_data.n_iter)
1920 spill_fill_data.next_iter = iter;
1926 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
1929 int iter = spill_fill_data.next_iter;
1932 mem = spill_restore_mem (reg, cfa_off);
1933 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1934 spill_fill_data.prev_insn[iter] = insn;
1941 RTX_FRAME_RELATED_P (insn) = 1;
1943 /* Don't even pretend that the unwind code can intuit its way
1944 through a pair of interleaved post_modify iterators. Just
1945 provide the correct answer. */
1947 if (frame_pointer_needed)
1949 base = hard_frame_pointer_rtx;
1954 base = stack_pointer_rtx;
1955 off = current_frame_info.total_size - cfa_off;
1959 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1960 gen_rtx_SET (VOIDmode,
1961 gen_rtx_MEM (GET_MODE (reg),
1962 plus_constant (base, off)),
1969 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
1971 int iter = spill_fill_data.next_iter;
1974 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1975 GEN_INT (cfa_off)));
1976 spill_fill_data.prev_insn[iter] = insn;
1979 /* Wrapper functions that discards the CONST_INT spill offset. These
1980 exist so that we can give gr_spill/gr_fill the offset they need and
1981 use a consistent function interface. */
1984 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1986 return gen_movdi (dest, src);
1990 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1992 return gen_fr_spill (dest, src);
1996 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
1998 return gen_fr_restore (dest, src);
2001 /* Called after register allocation to add any instructions needed for the
2002 prologue. Using a prologue insn is favored compared to putting all of the
2003 instructions in output_function_prologue(), since it allows the scheduler
2004 to intermix instructions with the saves of the caller saved registers. In
2005 some cases, it might be necessary to emit a barrier instruction as the last
2006 insn to prevent such scheduling.
2008 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2009 so that the debug info generation code can handle them properly.
2011 The register save area is layed out like so:
2013 [ varargs spill area ]
2014 [ fr register spill area ]
2015 [ br register spill area ]
2016 [ ar register spill area ]
2017 [ pr register spill area ]
2018 [ gr register spill area ] */
2020 /* ??? Get inefficient code when the frame size is larger than can fit in an
2021 adds instruction. */
2024 ia64_expand_prologue (void)
2026 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2027 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2030 ia64_compute_frame_size (get_frame_size ());
2031 last_scratch_gr_reg = 15;
2033 /* If there is no epilogue, then we don't need some prologue insns.
2034 We need to avoid emitting the dead prologue insns, because flow
2035 will complain about them. */
2040 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2041 if ((e->flags & EDGE_FAKE) == 0
2042 && (e->flags & EDGE_FALLTHRU) != 0)
2044 epilogue_p = (e != NULL);
2049 /* Set the local, input, and output register names. We need to do this
2050 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2051 half. If we use in/loc/out register names, then we get assembler errors
2052 in crtn.S because there is no alloc insn or regstk directive in there. */
2053 if (! TARGET_REG_NAMES)
2055 int inputs = current_frame_info.n_input_regs;
2056 int locals = current_frame_info.n_local_regs;
2057 int outputs = current_frame_info.n_output_regs;
2059 for (i = 0; i < inputs; i++)
2060 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2061 for (i = 0; i < locals; i++)
2062 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2063 for (i = 0; i < outputs; i++)
2064 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2067 /* Set the frame pointer register name. The regnum is logically loc79,
2068 but of course we'll not have allocated that many locals. Rather than
2069 worrying about renumbering the existing rtxs, we adjust the name. */
2070 /* ??? This code means that we can never use one local register when
2071 there is a frame pointer. loc79 gets wasted in this case, as it is
2072 renamed to a register that will never be used. See also the try_locals
2073 code in find_gr_spill. */
2074 if (current_frame_info.reg_fp)
2076 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2077 reg_names[HARD_FRAME_POINTER_REGNUM]
2078 = reg_names[current_frame_info.reg_fp];
2079 reg_names[current_frame_info.reg_fp] = tmp;
2082 /* We don't need an alloc instruction if we've used no outputs or locals. */
2083 if (current_frame_info.n_local_regs == 0
2084 && current_frame_info.n_output_regs == 0
2085 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2086 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2088 /* If there is no alloc, but there are input registers used, then we
2089 need a .regstk directive. */
2090 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2091 ar_pfs_save_reg = NULL_RTX;
2095 current_frame_info.need_regstk = 0;
2097 if (current_frame_info.reg_save_ar_pfs)
2098 regno = current_frame_info.reg_save_ar_pfs;
2100 regno = next_scratch_gr_reg ();
2101 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2103 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2104 GEN_INT (current_frame_info.n_input_regs),
2105 GEN_INT (current_frame_info.n_local_regs),
2106 GEN_INT (current_frame_info.n_output_regs),
2107 GEN_INT (current_frame_info.n_rotate_regs)));
2108 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2111 /* Set up frame pointer, stack pointer, and spill iterators. */
2113 n_varargs = cfun->machine->n_varargs;
2114 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2115 stack_pointer_rtx, 0);
2117 if (frame_pointer_needed)
2119 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2120 RTX_FRAME_RELATED_P (insn) = 1;
2123 if (current_frame_info.total_size != 0)
2125 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2128 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2129 offset = frame_size_rtx;
2132 regno = next_scratch_gr_reg ();
2133 offset = gen_rtx_REG (DImode, regno);
2134 emit_move_insn (offset, frame_size_rtx);
2137 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2138 stack_pointer_rtx, offset));
2140 if (! frame_pointer_needed)
2142 RTX_FRAME_RELATED_P (insn) = 1;
2143 if (GET_CODE (offset) != CONST_INT)
2146 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2147 gen_rtx_SET (VOIDmode,
2149 gen_rtx_PLUS (DImode,
2156 /* ??? At this point we must generate a magic insn that appears to
2157 modify the stack pointer, the frame pointer, and all spill
2158 iterators. This would allow the most scheduling freedom. For
2159 now, just hard stop. */
2160 emit_insn (gen_blockage ());
2163 /* Must copy out ar.unat before doing any integer spills. */
2164 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2166 if (current_frame_info.reg_save_ar_unat)
2168 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2171 alt_regno = next_scratch_gr_reg ();
2172 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2173 current_frame_info.gr_used_mask |= 1 << alt_regno;
2176 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2177 insn = emit_move_insn (ar_unat_save_reg, reg);
2178 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2180 /* Even if we're not going to generate an epilogue, we still
2181 need to save the register so that EH works. */
2182 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2183 emit_insn (gen_prologue_use (ar_unat_save_reg));
2186 ar_unat_save_reg = NULL_RTX;
2188 /* Spill all varargs registers. Do this before spilling any GR registers,
2189 since we want the UNAT bits for the GR registers to override the UNAT
2190 bits from varargs, which we don't care about. */
2193 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2195 reg = gen_rtx_REG (DImode, regno);
2196 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2199 /* Locate the bottom of the register save area. */
2200 cfa_off = (current_frame_info.spill_cfa_off
2201 + current_frame_info.spill_size
2202 + current_frame_info.extra_spill_size);
2204 /* Save the predicate register block either in a register or in memory. */
2205 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2207 reg = gen_rtx_REG (DImode, PR_REG (0));
2208 if (current_frame_info.reg_save_pr != 0)
2210 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2211 insn = emit_move_insn (alt_reg, reg);
2213 /* ??? Denote pr spill/fill by a DImode move that modifies all
2214 64 hard registers. */
2215 RTX_FRAME_RELATED_P (insn) = 1;
2217 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2218 gen_rtx_SET (VOIDmode, alt_reg, reg),
2221 /* Even if we're not going to generate an epilogue, we still
2222 need to save the register so that EH works. */
2224 emit_insn (gen_prologue_use (alt_reg));
2228 alt_regno = next_scratch_gr_reg ();
2229 alt_reg = gen_rtx_REG (DImode, alt_regno);
2230 insn = emit_move_insn (alt_reg, reg);
2231 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2236 /* Handle AR regs in numerical order. All of them get special handling. */
2237 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2238 && current_frame_info.reg_save_ar_unat == 0)
2240 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2241 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2245 /* The alloc insn already copied ar.pfs into a general register. The
2246 only thing we have to do now is copy that register to a stack slot
2247 if we'd not allocated a local register for the job. */
2248 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2249 && current_frame_info.reg_save_ar_pfs == 0)
2251 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2252 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2256 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2258 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2259 if (current_frame_info.reg_save_ar_lc != 0)
2261 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2262 insn = emit_move_insn (alt_reg, reg);
2263 RTX_FRAME_RELATED_P (insn) = 1;
2265 /* Even if we're not going to generate an epilogue, we still
2266 need to save the register so that EH works. */
2268 emit_insn (gen_prologue_use (alt_reg));
2272 alt_regno = next_scratch_gr_reg ();
2273 alt_reg = gen_rtx_REG (DImode, alt_regno);
2274 emit_move_insn (alt_reg, reg);
2275 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2280 if (current_frame_info.reg_save_gp)
2282 insn = emit_move_insn (gen_rtx_REG (DImode,
2283 current_frame_info.reg_save_gp),
2284 pic_offset_table_rtx);
2285 /* We don't know for sure yet if this is actually needed, since
2286 we've not split the PIC call patterns. If all of the calls
2287 are indirect, and not followed by any uses of the gp, then
2288 this save is dead. Allow it to go away. */
2290 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2293 /* We should now be at the base of the gr/br/fr spill area. */
2294 if (cfa_off != (current_frame_info.spill_cfa_off
2295 + current_frame_info.spill_size))
2298 /* Spill all general registers. */
2299 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2300 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2302 reg = gen_rtx_REG (DImode, regno);
2303 do_spill (gen_gr_spill, reg, cfa_off, reg);
2307 /* Handle BR0 specially -- it may be getting stored permanently in
2308 some GR register. */
2309 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2311 reg = gen_rtx_REG (DImode, BR_REG (0));
2312 if (current_frame_info.reg_save_b0 != 0)
2314 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2315 insn = emit_move_insn (alt_reg, reg);
2316 RTX_FRAME_RELATED_P (insn) = 1;
2318 /* Even if we're not going to generate an epilogue, we still
2319 need to save the register so that EH works. */
2321 emit_insn (gen_prologue_use (alt_reg));
2325 alt_regno = next_scratch_gr_reg ();
2326 alt_reg = gen_rtx_REG (DImode, alt_regno);
2327 emit_move_insn (alt_reg, reg);
2328 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2333 /* Spill the rest of the BR registers. */
2334 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2335 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2337 alt_regno = next_scratch_gr_reg ();
2338 alt_reg = gen_rtx_REG (DImode, alt_regno);
2339 reg = gen_rtx_REG (DImode, regno);
2340 emit_move_insn (alt_reg, reg);
2341 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2345 /* Align the frame and spill all FR registers. */
2346 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2347 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2351 reg = gen_rtx_REG (XFmode, regno);
2352 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2356 if (cfa_off != current_frame_info.spill_cfa_off)
2359 finish_spill_pointers ();
2362 /* Called after register allocation to add any instructions needed for the
2363 epilogue. Using an epilogue insn is favored compared to putting all of the
2364 instructions in output_function_prologue(), since it allows the scheduler
2365 to intermix instructions with the saves of the caller saved registers. In
2366 some cases, it might be necessary to emit a barrier instruction as the last
2367 insn to prevent such scheduling. */
2370 ia64_expand_epilogue (int sibcall_p)
2372 rtx insn, reg, alt_reg, ar_unat_save_reg;
2373 int regno, alt_regno, cfa_off;
2375 ia64_compute_frame_size (get_frame_size ());
2377 /* If there is a frame pointer, then we use it instead of the stack
2378 pointer, so that the stack pointer does not need to be valid when
2379 the epilogue starts. See EXIT_IGNORE_STACK. */
2380 if (frame_pointer_needed)
2381 setup_spill_pointers (current_frame_info.n_spilled,
2382 hard_frame_pointer_rtx, 0);
2384 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2385 current_frame_info.total_size);
2387 if (current_frame_info.total_size != 0)
2389 /* ??? At this point we must generate a magic insn that appears to
2390 modify the spill iterators and the frame pointer. This would
2391 allow the most scheduling freedom. For now, just hard stop. */
2392 emit_insn (gen_blockage ());
2395 /* Locate the bottom of the register save area. */
2396 cfa_off = (current_frame_info.spill_cfa_off
2397 + current_frame_info.spill_size
2398 + current_frame_info.extra_spill_size);
2400 /* Restore the predicate registers. */
2401 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2403 if (current_frame_info.reg_save_pr != 0)
2404 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2407 alt_regno = next_scratch_gr_reg ();
2408 alt_reg = gen_rtx_REG (DImode, alt_regno);
2409 do_restore (gen_movdi_x, alt_reg, cfa_off);
2412 reg = gen_rtx_REG (DImode, PR_REG (0));
2413 emit_move_insn (reg, alt_reg);
2416 /* Restore the application registers. */
2418 /* Load the saved unat from the stack, but do not restore it until
2419 after the GRs have been restored. */
2420 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2422 if (current_frame_info.reg_save_ar_unat != 0)
2424 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2427 alt_regno = next_scratch_gr_reg ();
2428 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2429 current_frame_info.gr_used_mask |= 1 << alt_regno;
2430 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2435 ar_unat_save_reg = NULL_RTX;
2437 if (current_frame_info.reg_save_ar_pfs != 0)
2439 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2440 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2441 emit_move_insn (reg, alt_reg);
2443 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2445 alt_regno = next_scratch_gr_reg ();
2446 alt_reg = gen_rtx_REG (DImode, alt_regno);
2447 do_restore (gen_movdi_x, alt_reg, cfa_off);
2449 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2450 emit_move_insn (reg, alt_reg);
2453 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2455 if (current_frame_info.reg_save_ar_lc != 0)
2456 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2459 alt_regno = next_scratch_gr_reg ();
2460 alt_reg = gen_rtx_REG (DImode, alt_regno);
2461 do_restore (gen_movdi_x, alt_reg, cfa_off);
2464 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2465 emit_move_insn (reg, alt_reg);
2468 /* We should now be at the base of the gr/br/fr spill area. */
2469 if (cfa_off != (current_frame_info.spill_cfa_off
2470 + current_frame_info.spill_size))
2473 /* The GP may be stored on the stack in the prologue, but it's
2474 never restored in the epilogue. Skip the stack slot. */
2475 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2478 /* Restore all general registers. */
2479 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2480 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2482 reg = gen_rtx_REG (DImode, regno);
2483 do_restore (gen_gr_restore, reg, cfa_off);
2487 /* Restore the branch registers. Handle B0 specially, as it may
2488 have gotten stored in some GR register. */
2489 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2491 if (current_frame_info.reg_save_b0 != 0)
2492 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2495 alt_regno = next_scratch_gr_reg ();
2496 alt_reg = gen_rtx_REG (DImode, alt_regno);
2497 do_restore (gen_movdi_x, alt_reg, cfa_off);
2500 reg = gen_rtx_REG (DImode, BR_REG (0));
2501 emit_move_insn (reg, alt_reg);
2504 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2505 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2507 alt_regno = next_scratch_gr_reg ();
2508 alt_reg = gen_rtx_REG (DImode, alt_regno);
2509 do_restore (gen_movdi_x, alt_reg, cfa_off);
2511 reg = gen_rtx_REG (DImode, regno);
2512 emit_move_insn (reg, alt_reg);
2515 /* Restore floating point registers. */
2516 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2517 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2521 reg = gen_rtx_REG (XFmode, regno);
2522 do_restore (gen_fr_restore_x, reg, cfa_off);
2526 /* Restore ar.unat for real. */
2527 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2529 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2530 emit_move_insn (reg, ar_unat_save_reg);
2533 if (cfa_off != current_frame_info.spill_cfa_off)
2536 finish_spill_pointers ();
2538 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2540 /* ??? At this point we must generate a magic insn that appears to
2541 modify the spill iterators, the stack pointer, and the frame
2542 pointer. This would allow the most scheduling freedom. For now,
2544 emit_insn (gen_blockage ());
2547 if (cfun->machine->ia64_eh_epilogue_sp)
2548 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2549 else if (frame_pointer_needed)
2551 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2552 RTX_FRAME_RELATED_P (insn) = 1;
2554 else if (current_frame_info.total_size)
2556 rtx offset, frame_size_rtx;
2558 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2559 if (CONST_OK_FOR_I (current_frame_info.total_size))
2560 offset = frame_size_rtx;
2563 regno = next_scratch_gr_reg ();
2564 offset = gen_rtx_REG (DImode, regno);
2565 emit_move_insn (offset, frame_size_rtx);
2568 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2571 RTX_FRAME_RELATED_P (insn) = 1;
2572 if (GET_CODE (offset) != CONST_INT)
2575 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2576 gen_rtx_SET (VOIDmode,
2578 gen_rtx_PLUS (DImode,
2585 if (cfun->machine->ia64_eh_epilogue_bsp)
2586 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2589 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2592 int fp = GR_REG (2);
2593 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2594 first available call clobbered register. If there was a frame_pointer
2595 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2596 so we have to make sure we're using the string "r2" when emitting
2597 the register name for the assembler. */
2598 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2599 fp = HARD_FRAME_POINTER_REGNUM;
2601 /* We must emit an alloc to force the input registers to become output
2602 registers. Otherwise, if the callee tries to pass its parameters
2603 through to another call without an intervening alloc, then these
2605 /* ??? We don't need to preserve all input registers. We only need to
2606 preserve those input registers used as arguments to the sibling call.
2607 It is unclear how to compute that number here. */
2608 if (current_frame_info.n_input_regs != 0)
2609 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2610 const0_rtx, const0_rtx,
2611 GEN_INT (current_frame_info.n_input_regs),
2616 /* Return 1 if br.ret can do all the work required to return from a
2620 ia64_direct_return (void)
2622 if (reload_completed && ! frame_pointer_needed)
2624 ia64_compute_frame_size (get_frame_size ());
2626 return (current_frame_info.total_size == 0
2627 && current_frame_info.n_spilled == 0
2628 && current_frame_info.reg_save_b0 == 0
2629 && current_frame_info.reg_save_pr == 0
2630 && current_frame_info.reg_save_ar_pfs == 0
2631 && current_frame_info.reg_save_ar_unat == 0
2632 && current_frame_info.reg_save_ar_lc == 0);
2637 /* Return the magic cookie that we use to hold the return address
2638 during early compilation. */
2641 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
2645 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
2648 /* Split this value after reload, now that we know where the return
2649 address is saved. */
2652 ia64_split_return_addr_rtx (rtx dest)
2656 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2658 if (current_frame_info.reg_save_b0 != 0)
2659 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2665 /* Compute offset from CFA for BR0. */
2666 /* ??? Must be kept in sync with ia64_expand_prologue. */
2667 off = (current_frame_info.spill_cfa_off
2668 + current_frame_info.spill_size);
2669 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2670 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2673 /* Convert CFA offset to a register based offset. */
2674 if (frame_pointer_needed)
2675 src = hard_frame_pointer_rtx;
2678 src = stack_pointer_rtx;
2679 off += current_frame_info.total_size;
2682 /* Load address into scratch register. */
2683 if (CONST_OK_FOR_I (off))
2684 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
2687 emit_move_insn (dest, GEN_INT (off));
2688 emit_insn (gen_adddi3 (dest, src, dest));
2691 src = gen_rtx_MEM (Pmode, dest);
2695 src = gen_rtx_REG (DImode, BR_REG (0));
2697 emit_move_insn (dest, src);
2701 ia64_hard_regno_rename_ok (int from, int to)
2703 /* Don't clobber any of the registers we reserved for the prologue. */
2704 if (to == current_frame_info.reg_fp
2705 || to == current_frame_info.reg_save_b0
2706 || to == current_frame_info.reg_save_pr
2707 || to == current_frame_info.reg_save_ar_pfs
2708 || to == current_frame_info.reg_save_ar_unat
2709 || to == current_frame_info.reg_save_ar_lc)
2712 if (from == current_frame_info.reg_fp
2713 || from == current_frame_info.reg_save_b0
2714 || from == current_frame_info.reg_save_pr
2715 || from == current_frame_info.reg_save_ar_pfs
2716 || from == current_frame_info.reg_save_ar_unat
2717 || from == current_frame_info.reg_save_ar_lc)
2720 /* Don't use output registers outside the register frame. */
2721 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2724 /* Retain even/oddness on predicate register pairs. */
2725 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2726 return (from & 1) == (to & 1);
2731 /* Target hook for assembling integer objects. Handle word-sized
2732 aligned objects and detect the cases when @fptr is needed. */
2735 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
2737 if (size == POINTER_SIZE / BITS_PER_UNIT
2739 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2740 && GET_CODE (x) == SYMBOL_REF
2741 && SYMBOL_REF_FUNCTION_P (x))
2743 if (POINTER_SIZE == 32)
2744 fputs ("\tdata4\t@fptr(", asm_out_file);
2746 fputs ("\tdata8\t@fptr(", asm_out_file);
2747 output_addr_const (asm_out_file, x);
2748 fputs (")\n", asm_out_file);
2751 return default_assemble_integer (x, size, aligned_p);
2754 /* Emit the function prologue. */
2757 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
2759 int mask, grsave, grsave_prev;
2761 if (current_frame_info.need_regstk)
2762 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2763 current_frame_info.n_input_regs,
2764 current_frame_info.n_local_regs,
2765 current_frame_info.n_output_regs,
2766 current_frame_info.n_rotate_regs);
2768 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2771 /* Emit the .prologue directive. */
2774 grsave = grsave_prev = 0;
2775 if (current_frame_info.reg_save_b0 != 0)
2778 grsave = grsave_prev = current_frame_info.reg_save_b0;
2780 if (current_frame_info.reg_save_ar_pfs != 0
2781 && (grsave_prev == 0
2782 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2785 if (grsave_prev == 0)
2786 grsave = current_frame_info.reg_save_ar_pfs;
2787 grsave_prev = current_frame_info.reg_save_ar_pfs;
2789 if (current_frame_info.reg_fp != 0
2790 && (grsave_prev == 0
2791 || current_frame_info.reg_fp == grsave_prev + 1))
2794 if (grsave_prev == 0)
2795 grsave = HARD_FRAME_POINTER_REGNUM;
2796 grsave_prev = current_frame_info.reg_fp;
2798 if (current_frame_info.reg_save_pr != 0
2799 && (grsave_prev == 0
2800 || current_frame_info.reg_save_pr == grsave_prev + 1))
2803 if (grsave_prev == 0)
2804 grsave = current_frame_info.reg_save_pr;
2807 if (mask && TARGET_GNU_AS)
2808 fprintf (file, "\t.prologue %d, %d\n", mask,
2809 ia64_dbx_register_number (grsave));
2811 fputs ("\t.prologue\n", file);
2813 /* Emit a .spill directive, if necessary, to relocate the base of
2814 the register spill area. */
2815 if (current_frame_info.spill_cfa_off != -16)
2816 fprintf (file, "\t.spill %ld\n",
2817 (long) (current_frame_info.spill_cfa_off
2818 + current_frame_info.spill_size));
2821 /* Emit the .body directive at the scheduled end of the prologue. */
2824 ia64_output_function_end_prologue (FILE *file)
2826 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2829 fputs ("\t.body\n", file);
2832 /* Emit the function epilogue. */
2835 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
2836 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
2840 if (current_frame_info.reg_fp)
2842 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2843 reg_names[HARD_FRAME_POINTER_REGNUM]
2844 = reg_names[current_frame_info.reg_fp];
2845 reg_names[current_frame_info.reg_fp] = tmp;
2847 if (! TARGET_REG_NAMES)
2849 for (i = 0; i < current_frame_info.n_input_regs; i++)
2850 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2851 for (i = 0; i < current_frame_info.n_local_regs; i++)
2852 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2853 for (i = 0; i < current_frame_info.n_output_regs; i++)
2854 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2857 current_frame_info.initialized = 0;
2861 ia64_dbx_register_number (int regno)
2863 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2864 from its home at loc79 to something inside the register frame. We
2865 must perform the same renumbering here for the debug info. */
2866 if (current_frame_info.reg_fp)
2868 if (regno == HARD_FRAME_POINTER_REGNUM)
2869 regno = current_frame_info.reg_fp;
2870 else if (regno == current_frame_info.reg_fp)
2871 regno = HARD_FRAME_POINTER_REGNUM;
2874 if (IN_REGNO_P (regno))
2875 return 32 + regno - IN_REG (0);
2876 else if (LOC_REGNO_P (regno))
2877 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2878 else if (OUT_REGNO_P (regno))
2879 return (32 + current_frame_info.n_input_regs
2880 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2886 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
2888 rtx addr_reg, eight = GEN_INT (8);
2890 /* The Intel assembler requires that the global __ia64_trampoline symbol
2891 be declared explicitly */
2894 static bool declared_ia64_trampoline = false;
2896 if (!declared_ia64_trampoline)
2898 declared_ia64_trampoline = true;
2899 (*targetm.asm_out.globalize_label) (asm_out_file,
2900 "__ia64_trampoline");
2904 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2905 addr = convert_memory_address (Pmode, addr);
2906 fnaddr = convert_memory_address (Pmode, fnaddr);
2907 static_chain = convert_memory_address (Pmode, static_chain);
2909 /* Load up our iterator. */
2910 addr_reg = gen_reg_rtx (Pmode);
2911 emit_move_insn (addr_reg, addr);
2913 /* The first two words are the fake descriptor:
2914 __ia64_trampoline, ADDR+16. */
2915 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2916 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2917 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2919 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2920 copy_to_reg (plus_constant (addr, 16)));
2921 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2923 /* The third word is the target descriptor. */
2924 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2925 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2927 /* The fourth word is the static chain. */
2928 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2931 /* Do any needed setup for a variadic function. CUM has not been updated
2932 for the last named argument which has type TYPE and mode MODE.
2934 We generate the actual spill instructions during prologue generation. */
2937 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2938 tree type, int * pretend_size,
2939 int second_time ATTRIBUTE_UNUSED)
2941 CUMULATIVE_ARGS next_cum = *cum;
2943 /* Skip the current argument. */
2944 ia64_function_arg_advance (&next_cum, mode, type, 1);
2946 if (next_cum.words < MAX_ARGUMENT_SLOTS)
2948 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
2949 *pretend_size = n * UNITS_PER_WORD;
2950 cfun->machine->n_varargs = n;
2954 /* Check whether TYPE is a homogeneous floating point aggregate. If
2955 it is, return the mode of the floating point type that appears
2956 in all leafs. If it is not, return VOIDmode.
2958 An aggregate is a homogeneous floating point aggregate is if all
2959 fields/elements in it have the same floating point type (e.g,
2960 SFmode). 128-bit quad-precision floats are excluded. */
2962 static enum machine_mode
2963 hfa_element_mode (tree type, int nested)
2965 enum machine_mode element_mode = VOIDmode;
2966 enum machine_mode mode;
2967 enum tree_code code = TREE_CODE (type);
2968 int know_element_mode = 0;
2973 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2974 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2975 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2976 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2980 /* Fortran complex types are supposed to be HFAs, so we need to handle
2981 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2984 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
2985 && TYPE_MODE (type) != TCmode)
2986 return GET_MODE_INNER (TYPE_MODE (type));
2991 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2992 mode if this is contained within an aggregate. */
2993 if (nested && TYPE_MODE (type) != TFmode)
2994 return TYPE_MODE (type);
2999 return hfa_element_mode (TREE_TYPE (type), 1);
3003 case QUAL_UNION_TYPE:
3004 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3006 if (TREE_CODE (t) != FIELD_DECL)
3009 mode = hfa_element_mode (TREE_TYPE (t), 1);
3010 if (know_element_mode)
3012 if (mode != element_mode)
3015 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3019 know_element_mode = 1;
3020 element_mode = mode;
3023 return element_mode;
3026 /* If we reach here, we probably have some front-end specific type
3027 that the backend doesn't know about. This can happen via the
3028 aggregate_value_p call in init_function_start. All we can do is
3029 ignore unknown tree types. */
3036 /* Return the number of words required to hold a quantity of TYPE and MODE
3037 when passed as an argument. */
3039 ia64_function_arg_words (tree type, enum machine_mode mode)
3043 if (mode == BLKmode)
3044 words = int_size_in_bytes (type);
3046 words = GET_MODE_SIZE (mode);
3048 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3051 /* Return the number of registers that should be skipped so the current
3052 argument (described by TYPE and WORDS) will be properly aligned.
3054 Integer and float arguments larger than 8 bytes start at the next
3055 even boundary. Aggregates larger than 8 bytes start at the next
3056 even boundary if the aggregate has 16 byte alignment. Note that
3057 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3058 but are still to be aligned in registers.
3060 ??? The ABI does not specify how to handle aggregates with
3061 alignment from 9 to 15 bytes, or greater than 16. We handle them
3062 all as if they had 16 byte alignment. Such aggregates can occur
3063 only if gcc extensions are used. */
3065 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3067 if ((cum->words & 1) == 0)
3071 && TREE_CODE (type) != INTEGER_TYPE
3072 && TREE_CODE (type) != REAL_TYPE)
3073 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3078 /* Return rtx for register where argument is passed, or zero if it is passed
3080 /* ??? 128-bit quad-precision floats are always passed in general
3084 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3085 int named, int incoming)
3087 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3088 int words = ia64_function_arg_words (type, mode);
3089 int offset = ia64_function_arg_offset (cum, type, words);
3090 enum machine_mode hfa_mode = VOIDmode;
3092 /* If all argument slots are used, then it must go on the stack. */
3093 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3096 /* Check for and handle homogeneous FP aggregates. */
3098 hfa_mode = hfa_element_mode (type, 0);
3100 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3101 and unprototyped hfas are passed specially. */
3102 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3106 int fp_regs = cum->fp_regs;
3107 int int_regs = cum->words + offset;
3108 int hfa_size = GET_MODE_SIZE (hfa_mode);
3112 /* If prototyped, pass it in FR regs then GR regs.
3113 If not prototyped, pass it in both FR and GR regs.
3115 If this is an SFmode aggregate, then it is possible to run out of
3116 FR regs while GR regs are still left. In that case, we pass the
3117 remaining part in the GR regs. */
3119 /* Fill the FP regs. We do this always. We stop if we reach the end
3120 of the argument, the last FP register, or the last argument slot. */
3122 byte_size = ((mode == BLKmode)
3123 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3124 args_byte_size = int_regs * UNITS_PER_WORD;
3126 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3127 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3129 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3130 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3134 args_byte_size += hfa_size;
3138 /* If no prototype, then the whole thing must go in GR regs. */
3139 if (! cum->prototype)
3141 /* If this is an SFmode aggregate, then we might have some left over
3142 that needs to go in GR regs. */
3143 else if (byte_size != offset)
3144 int_regs += offset / UNITS_PER_WORD;
3146 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3148 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3150 enum machine_mode gr_mode = DImode;
3151 unsigned int gr_size;
3153 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3154 then this goes in a GR reg left adjusted/little endian, right
3155 adjusted/big endian. */
3156 /* ??? Currently this is handled wrong, because 4-byte hunks are
3157 always right adjusted/little endian. */
3160 /* If we have an even 4 byte hunk because the aggregate is a
3161 multiple of 4 bytes in size, then this goes in a GR reg right
3162 adjusted/little endian. */
3163 else if (byte_size - offset == 4)
3166 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3167 gen_rtx_REG (gr_mode, (basereg
3171 gr_size = GET_MODE_SIZE (gr_mode);
3173 if (gr_size == UNITS_PER_WORD
3174 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3176 else if (gr_size > UNITS_PER_WORD)
3177 int_regs += gr_size / UNITS_PER_WORD;
3180 /* If we ended up using just one location, just return that one loc, but
3181 change the mode back to the argument mode. However, we can't do this
3182 when hfa_mode is XFmode and mode is TImode. In that case, we would
3183 return a TImode reference to an FP reg, but FP regs can't hold TImode.
3184 We need the PARALLEL to make this work. This can happen for a union
3185 containing a single __float80 member. */
3186 if (i == 1 && ! (hfa_mode == XFmode && mode == TImode))
3187 return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3189 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3192 /* Integral and aggregates go in general registers. If we have run out of
3193 FR registers, then FP values must also go in general registers. This can
3194 happen when we have a SFmode HFA. */
3195 else if (mode == TFmode || mode == TCmode
3196 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3198 int byte_size = ((mode == BLKmode)
3199 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3200 if (BYTES_BIG_ENDIAN
3201 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3202 && byte_size < UNITS_PER_WORD
3205 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3206 gen_rtx_REG (DImode,
3207 (basereg + cum->words
3210 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3213 return gen_rtx_REG (mode, basereg + cum->words + offset);
3217 /* If there is a prototype, then FP values go in a FR register when
3218 named, and in a GR register when unnamed. */
3219 else if (cum->prototype)
3222 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3223 /* In big-endian mode, an anonymous SFmode value must be represented
3224 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3225 the value into the high half of the general register. */
3226 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3227 return gen_rtx_PARALLEL (mode,
3229 gen_rtx_EXPR_LIST (VOIDmode,
3230 gen_rtx_REG (DImode, basereg + cum->words + offset),
3233 return gen_rtx_REG (mode, basereg + cum->words + offset);
3235 /* If there is no prototype, then FP values go in both FR and GR
3239 /* See comment above. */
3240 enum machine_mode inner_mode =
3241 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3243 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3244 gen_rtx_REG (mode, (FR_ARG_FIRST
3247 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3248 gen_rtx_REG (inner_mode,
3249 (basereg + cum->words
3253 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3257 /* Return number of words, at the beginning of the argument, that must be
3258 put in registers. 0 is the argument is entirely in registers or entirely
3262 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3263 tree type, int named ATTRIBUTE_UNUSED)
3265 int words = ia64_function_arg_words (type, mode);
3266 int offset = ia64_function_arg_offset (cum, type, words);
3268 /* If all argument slots are used, then it must go on the stack. */
3269 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3272 /* It doesn't matter whether the argument goes in FR or GR regs. If
3273 it fits within the 8 argument slots, then it goes entirely in
3274 registers. If it extends past the last argument slot, then the rest
3275 goes on the stack. */
3277 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3280 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3283 /* Update CUM to point after this argument. This is patterned after
3284 ia64_function_arg. */
3287 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3288 tree type, int named)
3290 int words = ia64_function_arg_words (type, mode);
3291 int offset = ia64_function_arg_offset (cum, type, words);
3292 enum machine_mode hfa_mode = VOIDmode;
3294 /* If all arg slots are already full, then there is nothing to do. */
3295 if (cum->words >= MAX_ARGUMENT_SLOTS)
3298 cum->words += words + offset;
3300 /* Check for and handle homogeneous FP aggregates. */
3302 hfa_mode = hfa_element_mode (type, 0);
3304 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3305 and unprototyped hfas are passed specially. */
3306 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3308 int fp_regs = cum->fp_regs;
3309 /* This is the original value of cum->words + offset. */
3310 int int_regs = cum->words - words;
3311 int hfa_size = GET_MODE_SIZE (hfa_mode);
3315 /* If prototyped, pass it in FR regs then GR regs.
3316 If not prototyped, pass it in both FR and GR regs.
3318 If this is an SFmode aggregate, then it is possible to run out of
3319 FR regs while GR regs are still left. In that case, we pass the
3320 remaining part in the GR regs. */
3322 /* Fill the FP regs. We do this always. We stop if we reach the end
3323 of the argument, the last FP register, or the last argument slot. */
3325 byte_size = ((mode == BLKmode)
3326 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3327 args_byte_size = int_regs * UNITS_PER_WORD;
3329 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3330 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3333 args_byte_size += hfa_size;
3337 cum->fp_regs = fp_regs;
3340 /* Integral and aggregates go in general registers. If we have run out of
3341 FR registers, then FP values must also go in general registers. This can
3342 happen when we have a SFmode HFA. */
3343 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3344 cum->int_regs = cum->words;
3346 /* If there is a prototype, then FP values go in a FR register when
3347 named, and in a GR register when unnamed. */
3348 else if (cum->prototype)
3351 cum->int_regs = cum->words;
3353 /* ??? Complex types should not reach here. */
3354 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3356 /* If there is no prototype, then FP values go in both FR and GR
3360 /* ??? Complex types should not reach here. */
3361 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3362 cum->int_regs = cum->words;
3366 /* Variable sized types are passed by reference. */
3367 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3370 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3371 enum machine_mode mode ATTRIBUTE_UNUSED,
3372 tree type, bool named ATTRIBUTE_UNUSED)
3374 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3377 /* True if it is OK to do sibling call optimization for the specified
3378 call expression EXP. DECL will be the called function, or NULL if
3379 this is an indirect call. */
3381 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3383 /* We must always return with our current GP. This means we can
3384 only sibcall to functions defined in the current module. */
3385 return decl && (*targetm.binds_local_p) (decl);
3389 /* Implement va_arg. */
3392 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3394 /* Variable sized types are passed by reference. */
3395 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
3397 tree ptrtype = build_pointer_type (type);
3398 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
3399 return build_fold_indirect_ref (addr);
3402 /* Aggregate arguments with alignment larger than 8 bytes start at
3403 the next even boundary. Integer and floating point arguments
3404 do so if they are larger than 8 bytes, whether or not they are
3405 also aligned larger than 8 bytes. */
3406 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3407 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3409 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3410 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3411 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3412 build_int_2 (-2 * UNITS_PER_WORD, -1));
3413 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3414 gimplify_and_add (t, pre_p);
3417 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3420 /* Return 1 if function return value returned in memory. Return 0 if it is
3424 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
3426 enum machine_mode mode;
3427 enum machine_mode hfa_mode;
3428 HOST_WIDE_INT byte_size;
3430 mode = TYPE_MODE (valtype);
3431 byte_size = GET_MODE_SIZE (mode);
3432 if (mode == BLKmode)
3434 byte_size = int_size_in_bytes (valtype);
3439 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3441 hfa_mode = hfa_element_mode (valtype, 0);
3442 if (hfa_mode != VOIDmode)
3444 int hfa_size = GET_MODE_SIZE (hfa_mode);
3446 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3451 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3457 /* Return rtx for register that holds the function return value. */
3460 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
3462 enum machine_mode mode;
3463 enum machine_mode hfa_mode;
3465 mode = TYPE_MODE (valtype);
3466 hfa_mode = hfa_element_mode (valtype, 0);
3468 if (hfa_mode != VOIDmode)
3476 hfa_size = GET_MODE_SIZE (hfa_mode);
3477 byte_size = ((mode == BLKmode)
3478 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3480 for (i = 0; offset < byte_size; i++)
3482 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3483 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3489 return XEXP (loc[0], 0);
3491 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3493 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
3494 return gen_rtx_REG (mode, FR_ARG_FIRST);
3497 if (BYTES_BIG_ENDIAN
3498 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3506 bytesize = int_size_in_bytes (valtype);
3507 for (i = 0; offset < bytesize; i++)
3509 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3510 gen_rtx_REG (DImode,
3513 offset += UNITS_PER_WORD;
3515 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3518 return gen_rtx_REG (mode, GR_RET_FIRST);
3522 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3523 We need to emit DTP-relative relocations. */
3526 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
3530 fputs ("\tdata8.ua\t@dtprel(", file);
3531 output_addr_const (file, x);
3535 /* Print a memory address as an operand to reference that memory location. */
3537 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3538 also call this from ia64_print_operand for memory addresses. */
3541 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
3542 rtx address ATTRIBUTE_UNUSED)
3546 /* Print an operand to an assembler instruction.
3547 C Swap and print a comparison operator.
3548 D Print an FP comparison operator.
3549 E Print 32 - constant, for SImode shifts as extract.
3550 e Print 64 - constant, for DImode rotates.
3551 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3552 a floating point register emitted normally.
3553 I Invert a predicate register by adding 1.
3554 J Select the proper predicate register for a condition.
3555 j Select the inverse predicate register for a condition.
3556 O Append .acq for volatile load.
3557 P Postincrement of a MEM.
3558 Q Append .rel for volatile store.
3559 S Shift amount for shladd instruction.
3560 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3561 for Intel assembler.
3562 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3563 for Intel assembler.
3564 r Print register name, or constant 0 as r0. HP compatibility for
3567 ia64_print_operand (FILE * file, rtx x, int code)
3574 /* Handled below. */
3579 enum rtx_code c = swap_condition (GET_CODE (x));
3580 fputs (GET_RTX_NAME (c), file);
3585 switch (GET_CODE (x))
3597 str = GET_RTX_NAME (GET_CODE (x));
3604 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3608 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3612 if (x == CONST0_RTX (GET_MODE (x)))
3613 str = reg_names [FR_REG (0)];
3614 else if (x == CONST1_RTX (GET_MODE (x)))
3615 str = reg_names [FR_REG (1)];
3616 else if (GET_CODE (x) == REG)
3617 str = reg_names [REGNO (x)];
3624 fputs (reg_names [REGNO (x) + 1], file);
3630 unsigned int regno = REGNO (XEXP (x, 0));
3631 if (GET_CODE (x) == EQ)
3635 fputs (reg_names [regno], file);
3640 if (MEM_VOLATILE_P (x))
3641 fputs(".acq", file);
3646 HOST_WIDE_INT value;
3648 switch (GET_CODE (XEXP (x, 0)))
3654 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3655 if (GET_CODE (x) == CONST_INT)
3657 else if (GET_CODE (x) == REG)
3659 fprintf (file, ", %s", reg_names[REGNO (x)]);
3667 value = GET_MODE_SIZE (GET_MODE (x));
3671 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3675 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
3680 if (MEM_VOLATILE_P (x))
3681 fputs(".rel", file);
3685 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3689 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3691 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3697 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3699 const char *prefix = "0x";
3700 if (INTVAL (x) & 0x80000000)
3702 fprintf (file, "0xffffffff");
3705 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3711 /* If this operand is the constant zero, write it as register zero.
3712 Any register, zero, or CONST_INT value is OK here. */
3713 if (GET_CODE (x) == REG)
3714 fputs (reg_names[REGNO (x)], file);
3715 else if (x == CONST0_RTX (GET_MODE (x)))
3717 else if (GET_CODE (x) == CONST_INT)
3718 output_addr_const (file, x);
3720 output_operand_lossage ("invalid %%r value");
3727 /* For conditional branches, returns or calls, substitute
3728 sptk, dptk, dpnt, or spnt for %s. */
3729 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3732 int pred_val = INTVAL (XEXP (x, 0));
3734 /* Guess top and bottom 10% statically predicted. */
3735 if (pred_val < REG_BR_PROB_BASE / 50)
3737 else if (pred_val < REG_BR_PROB_BASE / 2)
3739 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3744 else if (GET_CODE (current_output_insn) == CALL_INSN)
3749 fputs (which, file);
3754 x = current_insn_predicate;
3757 unsigned int regno = REGNO (XEXP (x, 0));
3758 if (GET_CODE (x) == EQ)
3760 fprintf (file, "(%s) ", reg_names [regno]);
3765 output_operand_lossage ("ia64_print_operand: unknown code");
3769 switch (GET_CODE (x))
3771 /* This happens for the spill/restore instructions. */
3776 /* ... fall through ... */
3779 fputs (reg_names [REGNO (x)], file);
3784 rtx addr = XEXP (x, 0);
3785 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
3786 addr = XEXP (addr, 0);
3787 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3792 output_addr_const (file, x);
3799 /* Compute a (partial) cost for rtx X. Return true if the complete
3800 cost has been computed, and false if subexpressions should be
3801 scanned. In either case, *TOTAL contains the cost result. */
3802 /* ??? This is incomplete. */
3805 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
3813 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
3816 if (CONST_OK_FOR_I (INTVAL (x)))
3818 else if (CONST_OK_FOR_J (INTVAL (x)))
3821 *total = COSTS_N_INSNS (1);
3824 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
3827 *total = COSTS_N_INSNS (1);
3832 *total = COSTS_N_INSNS (1);
3838 *total = COSTS_N_INSNS (3);
3842 /* For multiplies wider than HImode, we have to go to the FPU,
3843 which normally involves copies. Plus there's the latency
3844 of the multiply itself, and the latency of the instructions to
3845 transfer integer regs to FP regs. */
3846 /* ??? Check for FP mode. */
3847 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
3848 *total = COSTS_N_INSNS (10);
3850 *total = COSTS_N_INSNS (2);
3858 *total = COSTS_N_INSNS (1);
3865 /* We make divide expensive, so that divide-by-constant will be
3866 optimized to a multiply. */
3867 *total = COSTS_N_INSNS (60);
3875 /* Calculate the cost of moving data from a register in class FROM to
3876 one in class TO, using MODE. */
3879 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
3882 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3883 if (to == ADDL_REGS)
3885 if (from == ADDL_REGS)
3888 /* All costs are symmetric, so reduce cases by putting the
3889 lower number class as the destination. */
3892 enum reg_class tmp = to;
3893 to = from, from = tmp;
3896 /* Moving from FR<->GR in XFmode must be more expensive than 2,
3897 so that we get secondary memory reloads. Between FR_REGS,
3898 we have to make this at least as expensive as MEMORY_MOVE_COST
3899 to avoid spectacularly poor register class preferencing. */
3902 if (to != GR_REGS || from != GR_REGS)
3903 return MEMORY_MOVE_COST (mode, to, 0);
3911 /* Moving between PR registers takes two insns. */
3912 if (from == PR_REGS)
3914 /* Moving between PR and anything but GR is impossible. */
3915 if (from != GR_REGS)
3916 return MEMORY_MOVE_COST (mode, to, 0);
3920 /* Moving between BR and anything but GR is impossible. */
3921 if (from != GR_REGS && from != GR_AND_BR_REGS)
3922 return MEMORY_MOVE_COST (mode, to, 0);
3927 /* Moving between AR and anything but GR is impossible. */
3928 if (from != GR_REGS)
3929 return MEMORY_MOVE_COST (mode, to, 0);
3934 case GR_AND_FR_REGS:
3935 case GR_AND_BR_REGS:
3946 /* This function returns the register class required for a secondary
3947 register when copying between one of the registers in CLASS, and X,
3948 using MODE. A return value of NO_REGS means that no secondary register
3952 ia64_secondary_reload_class (enum reg_class class,
3953 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3957 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3958 regno = true_regnum (x);
3965 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3966 interaction. We end up with two pseudos with overlapping lifetimes
3967 both of which are equiv to the same constant, and both which need
3968 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3969 changes depending on the path length, which means the qty_first_reg
3970 check in make_regs_eqv can give different answers at different times.
3971 At some point I'll probably need a reload_indi pattern to handle
3974 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3975 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3976 non-general registers for good measure. */
3977 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3980 /* This is needed if a pseudo used as a call_operand gets spilled to a
3982 if (GET_CODE (x) == MEM)
3987 /* Need to go through general registers to get to other class regs. */
3988 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3991 /* This can happen when a paradoxical subreg is an operand to the
3993 /* ??? This shouldn't be necessary after instruction scheduling is
3994 enabled, because paradoxical subregs are not accepted by
3995 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3996 stop the paradoxical subreg stupidity in the *_operand functions
3998 if (GET_CODE (x) == MEM
3999 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4000 || GET_MODE (x) == QImode))
4003 /* This can happen because of the ior/and/etc patterns that accept FP
4004 registers as operands. If the third operand is a constant, then it
4005 needs to be reloaded into a FP register. */
4006 if (GET_CODE (x) == CONST_INT)
4009 /* This can happen because of register elimination in a muldi3 insn.
4010 E.g. `26107 * (unsigned long)&u'. */
4011 if (GET_CODE (x) == PLUS)
4016 /* ??? This happens if we cse/gcse a BImode value across a call,
4017 and the function has a nonlocal goto. This is because global
4018 does not allocate call crossing pseudos to hard registers when
4019 current_function_has_nonlocal_goto is true. This is relatively
4020 common for C++ programs that use exceptions. To reproduce,
4021 return NO_REGS and compile libstdc++. */
4022 if (GET_CODE (x) == MEM)
4025 /* This can happen when we take a BImode subreg of a DImode value,
4026 and that DImode value winds up in some non-GR register. */
4027 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4039 /* Emit text to declare externally defined variables and functions, because
4040 the Intel assembler does not support undefined externals. */
4043 ia64_asm_output_external (FILE *file, tree decl, const char *name)
4045 int save_referenced;
4047 /* GNU as does not need anything here, but the HP linker does need
4048 something for external functions. */
4052 || TREE_CODE (decl) != FUNCTION_DECL
4053 || strstr (name, "__builtin_") == name))
4056 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4057 the linker when we do this, so we need to be careful not to do this for
4058 builtin functions which have no library equivalent. Unfortunately, we
4059 can't tell here whether or not a function will actually be called by
4060 expand_expr, so we pull in library functions even if we may not need
4062 if (! strcmp (name, "__builtin_next_arg")
4063 || ! strcmp (name, "alloca")
4064 || ! strcmp (name, "__builtin_constant_p")
4065 || ! strcmp (name, "__builtin_args_info"))
4069 ia64_hpux_add_extern_decl (decl);
4072 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4074 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4075 if (TREE_CODE (decl) == FUNCTION_DECL)
4076 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4077 (*targetm.asm_out.globalize_label) (file, name);
4078 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4082 /* Parse the -mfixed-range= option string. */
4085 fix_range (const char *const_str)
4088 char *str, *dash, *comma;
4090 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4091 REG2 are either register names or register numbers. The effect
4092 of this option is to mark the registers in the range from REG1 to
4093 REG2 as ``fixed'' so they won't be used by the compiler. This is
4094 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4096 i = strlen (const_str);
4097 str = (char *) alloca (i + 1);
4098 memcpy (str, const_str, i + 1);
4102 dash = strchr (str, '-');
4105 warning ("value of -mfixed-range must have form REG1-REG2");
4110 comma = strchr (dash + 1, ',');
4114 first = decode_reg_name (str);
4117 warning ("unknown register name: %s", str);
4121 last = decode_reg_name (dash + 1);
4124 warning ("unknown register name: %s", dash + 1);
4132 warning ("%s-%s is an empty range", str, dash + 1);
4136 for (i = first; i <= last; ++i)
4137 fixed_regs[i] = call_used_regs[i] = 1;
4147 static struct machine_function *
4148 ia64_init_machine_status (void)
4150 return ggc_alloc_cleared (sizeof (struct machine_function));
4153 /* Handle TARGET_OPTIONS switches. */
4156 ia64_override_options (void)
4160 const char *const name; /* processor name or nickname. */
4161 const enum processor_type processor;
4163 const processor_alias_table[] =
4165 {"itanium", PROCESSOR_ITANIUM},
4166 {"itanium1", PROCESSOR_ITANIUM},
4167 {"merced", PROCESSOR_ITANIUM},
4168 {"itanium2", PROCESSOR_ITANIUM2},
4169 {"mckinley", PROCESSOR_ITANIUM2},
4172 int const pta_size = ARRAY_SIZE (processor_alias_table);
4175 if (TARGET_AUTO_PIC)
4176 target_flags |= MASK_CONST_GP;
4178 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4180 if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
4181 && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
4183 warning ("cannot optimize floating point division for both latency and throughput");
4184 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4188 if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
4189 target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
4191 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4195 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4197 if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
4198 && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
4200 warning ("cannot optimize integer division for both latency and throughput");
4201 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4205 if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
4206 target_flags &= ~MASK_INLINE_INT_DIV_LAT;
4208 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4212 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4214 if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
4215 && (target_flags_explicit & MASK_INLINE_SQRT_THR))
4217 warning ("cannot optimize square root for both latency and throughput");
4218 target_flags &= ~MASK_INLINE_SQRT_THR;
4222 if (target_flags_explicit & MASK_INLINE_SQRT_THR)
4223 target_flags &= ~MASK_INLINE_SQRT_LAT;
4225 target_flags &= ~MASK_INLINE_SQRT_THR;
4229 if (TARGET_INLINE_SQRT_LAT)
4231 warning ("not yet implemented: latency-optimized inline square root");
4232 target_flags &= ~MASK_INLINE_SQRT_LAT;
4235 if (ia64_fixed_range_string)
4236 fix_range (ia64_fixed_range_string);
4238 if (ia64_tls_size_string)
4241 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4242 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4243 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4245 ia64_tls_size = tmp;
4248 if (!ia64_tune_string)
4249 ia64_tune_string = "itanium2";
4251 for (i = 0; i < pta_size; i++)
4252 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4254 ia64_tune = processor_alias_table[i].processor;
4259 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4261 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4262 flag_schedule_insns_after_reload = 0;
4264 /* Variable tracking should be run after all optimizations which change order
4265 of insns. It also needs a valid CFG. */
4266 ia64_flag_var_tracking = flag_var_tracking;
4267 flag_var_tracking = 0;
4269 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4271 init_machine_status = ia64_init_machine_status;
4274 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4275 static enum attr_type ia64_safe_type (rtx);
4277 static enum attr_itanium_class
4278 ia64_safe_itanium_class (rtx insn)
4280 if (recog_memoized (insn) >= 0)
4281 return get_attr_itanium_class (insn);
4283 return ITANIUM_CLASS_UNKNOWN;
4286 static enum attr_type
4287 ia64_safe_type (rtx insn)
4289 if (recog_memoized (insn) >= 0)
4290 return get_attr_type (insn);
4292 return TYPE_UNKNOWN;
4295 /* The following collection of routines emit instruction group stop bits as
4296 necessary to avoid dependencies. */
4298 /* Need to track some additional registers as far as serialization is
4299 concerned so we can properly handle br.call and br.ret. We could
4300 make these registers visible to gcc, but since these registers are
4301 never explicitly used in gcc generated code, it seems wasteful to
4302 do so (plus it would make the call and return patterns needlessly
4304 #define REG_RP (BR_REG (0))
4305 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4306 /* This is used for volatile asms which may require a stop bit immediately
4307 before and after them. */
4308 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4309 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4310 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4312 /* For each register, we keep track of how it has been written in the
4313 current instruction group.
4315 If a register is written unconditionally (no qualifying predicate),
4316 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4318 If a register is written if its qualifying predicate P is true, we
4319 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4320 may be written again by the complement of P (P^1) and when this happens,
4321 WRITE_COUNT gets set to 2.
4323 The result of this is that whenever an insn attempts to write a register
4324 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4326 If a predicate register is written by a floating-point insn, we set
4327 WRITTEN_BY_FP to true.
4329 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4330 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4332 struct reg_write_state
4334 unsigned int write_count : 2;
4335 unsigned int first_pred : 16;
4336 unsigned int written_by_fp : 1;
4337 unsigned int written_by_and : 1;
4338 unsigned int written_by_or : 1;
4341 /* Cumulative info for the current instruction group. */
4342 struct reg_write_state rws_sum[NUM_REGS];
4343 /* Info for the current instruction. This gets copied to rws_sum after a
4344 stop bit is emitted. */
4345 struct reg_write_state rws_insn[NUM_REGS];
4347 /* Indicates whether this is the first instruction after a stop bit,
4348 in which case we don't need another stop bit. Without this, we hit
4349 the abort in ia64_variable_issue when scheduling an alloc. */
4350 static int first_instruction;
4352 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4353 RTL for one instruction. */
4356 unsigned int is_write : 1; /* Is register being written? */
4357 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4358 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4359 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4360 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4361 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4364 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4365 static int rws_access_regno (int, struct reg_flags, int);
4366 static int rws_access_reg (rtx, struct reg_flags, int);
4367 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4368 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4369 static int rtx_needs_barrier (rtx, struct reg_flags, int);
4370 static void init_insn_group_barriers (void);
4371 static int group_barrier_needed_p (rtx);
4372 static int safe_group_barrier_needed_p (rtx);
4374 /* Update *RWS for REGNO, which is being written by the current instruction,
4375 with predicate PRED, and associated register flags in FLAGS. */
4378 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4381 rws[regno].write_count++;
4383 rws[regno].write_count = 2;
4384 rws[regno].written_by_fp |= flags.is_fp;
4385 /* ??? Not tracking and/or across differing predicates. */
4386 rws[regno].written_by_and = flags.is_and;
4387 rws[regno].written_by_or = flags.is_or;
4388 rws[regno].first_pred = pred;
4391 /* Handle an access to register REGNO of type FLAGS using predicate register
4392 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4393 a dependency with an earlier instruction in the same group. */
4396 rws_access_regno (int regno, struct reg_flags flags, int pred)
4398 int need_barrier = 0;
4400 if (regno >= NUM_REGS)
4403 if (! PR_REGNO_P (regno))
4404 flags.is_and = flags.is_or = 0;
4410 /* One insn writes same reg multiple times? */
4411 if (rws_insn[regno].write_count > 0)
4414 /* Update info for current instruction. */
4415 rws_update (rws_insn, regno, flags, pred);
4416 write_count = rws_sum[regno].write_count;
4418 switch (write_count)
4421 /* The register has not been written yet. */
4422 rws_update (rws_sum, regno, flags, pred);
4426 /* The register has been written via a predicate. If this is
4427 not a complementary predicate, then we need a barrier. */
4428 /* ??? This assumes that P and P+1 are always complementary
4429 predicates for P even. */
4430 if (flags.is_and && rws_sum[regno].written_by_and)
4432 else if (flags.is_or && rws_sum[regno].written_by_or)
4434 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4436 rws_update (rws_sum, regno, flags, pred);
4440 /* The register has been unconditionally written already. We
4442 if (flags.is_and && rws_sum[regno].written_by_and)
4444 else if (flags.is_or && rws_sum[regno].written_by_or)
4448 rws_sum[regno].written_by_and = flags.is_and;
4449 rws_sum[regno].written_by_or = flags.is_or;
4458 if (flags.is_branch)
4460 /* Branches have several RAW exceptions that allow to avoid
4463 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4464 /* RAW dependencies on branch regs are permissible as long
4465 as the writer is a non-branch instruction. Since we
4466 never generate code that uses a branch register written
4467 by a branch instruction, handling this case is
4471 if (REGNO_REG_CLASS (regno) == PR_REGS
4472 && ! rws_sum[regno].written_by_fp)
4473 /* The predicates of a branch are available within the
4474 same insn group as long as the predicate was written by
4475 something other than a floating-point instruction. */
4479 if (flags.is_and && rws_sum[regno].written_by_and)
4481 if (flags.is_or && rws_sum[regno].written_by_or)
4484 switch (rws_sum[regno].write_count)
4487 /* The register has not been written yet. */
4491 /* The register has been written via a predicate. If this is
4492 not a complementary predicate, then we need a barrier. */
4493 /* ??? This assumes that P and P+1 are always complementary
4494 predicates for P even. */
4495 if ((rws_sum[regno].first_pred ^ 1) != pred)
4500 /* The register has been unconditionally written already. We
4510 return need_barrier;
4514 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
4516 int regno = REGNO (reg);
4517 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4520 return rws_access_regno (regno, flags, pred);
4523 int need_barrier = 0;
4525 need_barrier |= rws_access_regno (regno + n, flags, pred);
4526 return need_barrier;
4530 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4531 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4534 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
4536 rtx src = SET_SRC (x);
4540 switch (GET_CODE (src))
4546 if (SET_DEST (x) == pc_rtx)
4547 /* X is a conditional branch. */
4551 int is_complemented = 0;
4553 /* X is a conditional move. */
4554 rtx cond = XEXP (src, 0);
4555 if (GET_CODE (cond) == EQ)
4556 is_complemented = 1;
4557 cond = XEXP (cond, 0);
4558 if (GET_CODE (cond) != REG
4559 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4562 if (XEXP (src, 1) == SET_DEST (x)
4563 || XEXP (src, 2) == SET_DEST (x))
4565 /* X is a conditional move that conditionally writes the
4568 /* We need another complement in this case. */
4569 if (XEXP (src, 1) == SET_DEST (x))
4570 is_complemented = ! is_complemented;
4572 *ppred = REGNO (cond);
4573 if (is_complemented)
4577 /* ??? If this is a conditional write to the dest, then this
4578 instruction does not actually read one source. This probably
4579 doesn't matter, because that source is also the dest. */
4580 /* ??? Multiple writes to predicate registers are allowed
4581 if they are all AND type compares, or if they are all OR
4582 type compares. We do not generate such instructions
4585 /* ... fall through ... */
4588 if (COMPARISON_P (src)
4589 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4590 /* Set pflags->is_fp to 1 so that we know we're dealing
4591 with a floating point comparison when processing the
4592 destination of the SET. */
4595 /* Discover if this is a parallel comparison. We only handle
4596 and.orcm and or.andcm at present, since we must retain a
4597 strict inverse on the predicate pair. */
4598 else if (GET_CODE (src) == AND)
4600 else if (GET_CODE (src) == IOR)
4607 /* Subroutine of rtx_needs_barrier; this function determines whether the
4608 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4609 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4613 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
4615 int need_barrier = 0;
4617 rtx src = SET_SRC (x);
4619 if (GET_CODE (src) == CALL)
4620 /* We don't need to worry about the result registers that
4621 get written by subroutine call. */
4622 return rtx_needs_barrier (src, flags, pred);
4623 else if (SET_DEST (x) == pc_rtx)
4625 /* X is a conditional branch. */
4626 /* ??? This seems redundant, as the caller sets this bit for
4628 flags.is_branch = 1;
4629 return rtx_needs_barrier (src, flags, pred);
4632 need_barrier = rtx_needs_barrier (src, flags, pred);
4634 /* This instruction unconditionally uses a predicate register. */
4636 need_barrier |= rws_access_reg (cond, flags, 0);
4639 if (GET_CODE (dst) == ZERO_EXTRACT)
4641 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4642 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4643 dst = XEXP (dst, 0);
4645 return need_barrier;
4648 /* Handle an access to rtx X of type FLAGS using predicate register
4649 PRED. Return 1 if this access creates a dependency with an earlier
4650 instruction in the same group. */
4653 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
4656 int is_complemented = 0;
4657 int need_barrier = 0;
4658 const char *format_ptr;
4659 struct reg_flags new_flags;
4667 switch (GET_CODE (x))
4670 update_set_flags (x, &new_flags, &pred, &cond);
4671 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4672 if (GET_CODE (SET_SRC (x)) != CALL)
4674 new_flags.is_write = 1;
4675 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4680 new_flags.is_write = 0;
4681 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4683 /* Avoid multiple register writes, in case this is a pattern with
4684 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4685 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4687 new_flags.is_write = 1;
4688 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4689 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4690 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4695 /* X is a predicated instruction. */
4697 cond = COND_EXEC_TEST (x);
4700 need_barrier = rtx_needs_barrier (cond, flags, 0);
4702 if (GET_CODE (cond) == EQ)
4703 is_complemented = 1;
4704 cond = XEXP (cond, 0);
4705 if (GET_CODE (cond) != REG
4706 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4708 pred = REGNO (cond);
4709 if (is_complemented)
4712 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4713 return need_barrier;
4717 /* Clobber & use are for earlier compiler-phases only. */
4722 /* We always emit stop bits for traditional asms. We emit stop bits
4723 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4724 if (GET_CODE (x) != ASM_OPERANDS
4725 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4727 /* Avoid writing the register multiple times if we have multiple
4728 asm outputs. This avoids an abort in rws_access_reg. */
4729 if (! rws_insn[REG_VOLATILE].write_count)
4731 new_flags.is_write = 1;
4732 rws_access_regno (REG_VOLATILE, new_flags, pred);
4737 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4738 We cannot just fall through here since then we would be confused
4739 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4740 traditional asms unlike their normal usage. */
4742 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4743 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4748 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4750 rtx pat = XVECEXP (x, 0, i);
4751 if (GET_CODE (pat) == SET)
4753 update_set_flags (pat, &new_flags, &pred, &cond);
4754 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4756 else if (GET_CODE (pat) == USE
4757 || GET_CODE (pat) == CALL
4758 || GET_CODE (pat) == ASM_OPERANDS)
4759 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4760 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4763 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4765 rtx pat = XVECEXP (x, 0, i);
4766 if (GET_CODE (pat) == SET)
4768 if (GET_CODE (SET_SRC (pat)) != CALL)
4770 new_flags.is_write = 1;
4771 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4775 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4776 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4784 if (REGNO (x) == AR_UNAT_REGNUM)
4786 for (i = 0; i < 64; ++i)
4787 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4790 need_barrier = rws_access_reg (x, flags, pred);
4794 /* Find the regs used in memory address computation. */
4795 new_flags.is_write = 0;
4796 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4799 case CONST_INT: case CONST_DOUBLE:
4800 case SYMBOL_REF: case LABEL_REF: case CONST:
4803 /* Operators with side-effects. */
4804 case POST_INC: case POST_DEC:
4805 if (GET_CODE (XEXP (x, 0)) != REG)
4808 new_flags.is_write = 0;
4809 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4810 new_flags.is_write = 1;
4811 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4815 if (GET_CODE (XEXP (x, 0)) != REG)
4818 new_flags.is_write = 0;
4819 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4820 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4821 new_flags.is_write = 1;
4822 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4825 /* Handle common unary and binary ops for efficiency. */
4826 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4827 case MOD: case UDIV: case UMOD: case AND: case IOR:
4828 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4829 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4830 case NE: case EQ: case GE: case GT: case LE:
4831 case LT: case GEU: case GTU: case LEU: case LTU:
4832 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4833 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4836 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4837 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4838 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4839 case SQRT: case FFS: case POPCOUNT:
4840 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4844 switch (XINT (x, 1))
4846 case UNSPEC_LTOFF_DTPMOD:
4847 case UNSPEC_LTOFF_DTPREL:
4849 case UNSPEC_LTOFF_TPREL:
4851 case UNSPEC_PRED_REL_MUTEX:
4852 case UNSPEC_PIC_CALL:
4854 case UNSPEC_FETCHADD_ACQ:
4855 case UNSPEC_BSP_VALUE:
4856 case UNSPEC_FLUSHRS:
4857 case UNSPEC_BUNDLE_SELECTOR:
4860 case UNSPEC_GR_SPILL:
4861 case UNSPEC_GR_RESTORE:
4863 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4864 HOST_WIDE_INT bit = (offset >> 3) & 63;
4866 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4867 new_flags.is_write = (XINT (x, 1) == 1);
4868 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4873 case UNSPEC_FR_SPILL:
4874 case UNSPEC_FR_RESTORE:
4875 case UNSPEC_GETF_EXP:
4876 case UNSPEC_SETF_EXP:
4878 case UNSPEC_FR_SQRT_RECIP_APPROX:
4879 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4882 case UNSPEC_FR_RECIP_APPROX:
4883 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4884 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4887 case UNSPEC_CMPXCHG_ACQ:
4888 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4889 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4897 case UNSPEC_VOLATILE:
4898 switch (XINT (x, 1))
4901 /* Alloc must always be the first instruction of a group.
4902 We force this by always returning true. */
4903 /* ??? We might get better scheduling if we explicitly check for
4904 input/local/output register dependencies, and modify the
4905 scheduler so that alloc is always reordered to the start of
4906 the current group. We could then eliminate all of the
4907 first_instruction code. */
4908 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4910 new_flags.is_write = 1;
4911 rws_access_regno (REG_AR_CFM, new_flags, pred);
4914 case UNSPECV_SET_BSP:
4918 case UNSPECV_BLOCKAGE:
4919 case UNSPECV_INSN_GROUP_BARRIER:
4921 case UNSPECV_PSAC_ALL:
4922 case UNSPECV_PSAC_NORMAL:
4931 new_flags.is_write = 0;
4932 need_barrier = rws_access_regno (REG_RP, flags, pred);
4933 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4935 new_flags.is_write = 1;
4936 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4937 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4941 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4942 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4943 switch (format_ptr[i])
4945 case '0': /* unused field */
4946 case 'i': /* integer */
4947 case 'n': /* note */
4948 case 'w': /* wide integer */
4949 case 's': /* pointer to string */
4950 case 'S': /* optional pointer to string */
4954 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4959 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4960 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4969 return need_barrier;
4972 /* Clear out the state for group_barrier_needed_p at the start of a
4973 sequence of insns. */
4976 init_insn_group_barriers (void)
4978 memset (rws_sum, 0, sizeof (rws_sum));
4979 first_instruction = 1;
4982 /* Given the current state, recorded by previous calls to this function,
4983 determine whether a group barrier (a stop bit) is necessary before INSN.
4984 Return nonzero if so. */
4987 group_barrier_needed_p (rtx insn)
4990 int need_barrier = 0;
4991 struct reg_flags flags;
4993 memset (&flags, 0, sizeof (flags));
4994 switch (GET_CODE (insn))
5000 /* A barrier doesn't imply an instruction group boundary. */
5004 memset (rws_insn, 0, sizeof (rws_insn));
5008 flags.is_branch = 1;
5009 flags.is_sibcall = SIBLING_CALL_P (insn);
5010 memset (rws_insn, 0, sizeof (rws_insn));
5012 /* Don't bundle a call following another call. */
5013 if ((pat = prev_active_insn (insn))
5014 && GET_CODE (pat) == CALL_INSN)
5020 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5024 flags.is_branch = 1;
5026 /* Don't bundle a jump following a call. */
5027 if ((pat = prev_active_insn (insn))
5028 && GET_CODE (pat) == CALL_INSN)
5036 if (GET_CODE (PATTERN (insn)) == USE
5037 || GET_CODE (PATTERN (insn)) == CLOBBER)
5038 /* Don't care about USE and CLOBBER "insns"---those are used to
5039 indicate to the optimizer that it shouldn't get rid of
5040 certain operations. */
5043 pat = PATTERN (insn);
5045 /* Ug. Hack hacks hacked elsewhere. */
5046 switch (recog_memoized (insn))
5048 /* We play dependency tricks with the epilogue in order
5049 to get proper schedules. Undo this for dv analysis. */
5050 case CODE_FOR_epilogue_deallocate_stack:
5051 case CODE_FOR_prologue_allocate_stack:
5052 pat = XVECEXP (pat, 0, 0);
5055 /* The pattern we use for br.cloop confuses the code above.
5056 The second element of the vector is representative. */
5057 case CODE_FOR_doloop_end_internal:
5058 pat = XVECEXP (pat, 0, 1);
5061 /* Doesn't generate code. */
5062 case CODE_FOR_pred_rel_mutex:
5063 case CODE_FOR_prologue_use:
5070 memset (rws_insn, 0, sizeof (rws_insn));
5071 need_barrier = rtx_needs_barrier (pat, flags, 0);
5073 /* Check to see if the previous instruction was a volatile
5076 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5083 if (first_instruction && INSN_P (insn)
5084 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5085 && GET_CODE (PATTERN (insn)) != USE
5086 && GET_CODE (PATTERN (insn)) != CLOBBER)
5089 first_instruction = 0;
5092 return need_barrier;
5095 /* Like group_barrier_needed_p, but do not clobber the current state. */
5098 safe_group_barrier_needed_p (rtx insn)
5100 struct reg_write_state rws_saved[NUM_REGS];
5101 int saved_first_instruction;
5104 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5105 saved_first_instruction = first_instruction;
5107 t = group_barrier_needed_p (insn);
5109 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5110 first_instruction = saved_first_instruction;
5115 /* Scan the current function and insert stop bits as necessary to
5116 eliminate dependencies. This function assumes that a final
5117 instruction scheduling pass has been run which has already
5118 inserted most of the necessary stop bits. This function only
5119 inserts new ones at basic block boundaries, since these are
5120 invisible to the scheduler. */
5123 emit_insn_group_barriers (FILE *dump)
5127 int insns_since_last_label = 0;
5129 init_insn_group_barriers ();
5131 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5133 if (GET_CODE (insn) == CODE_LABEL)
5135 if (insns_since_last_label)
5137 insns_since_last_label = 0;
5139 else if (GET_CODE (insn) == NOTE
5140 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5142 if (insns_since_last_label)
5144 insns_since_last_label = 0;
5146 else if (GET_CODE (insn) == INSN
5147 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5148 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5150 init_insn_group_barriers ();
5153 else if (INSN_P (insn))
5155 insns_since_last_label = 1;
5157 if (group_barrier_needed_p (insn))
5162 fprintf (dump, "Emitting stop before label %d\n",
5163 INSN_UID (last_label));
5164 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5167 init_insn_group_barriers ();
5175 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5176 This function has to emit all necessary group barriers. */
5179 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5183 init_insn_group_barriers ();
5185 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5187 if (GET_CODE (insn) == BARRIER)
5189 rtx last = prev_active_insn (insn);
5193 if (GET_CODE (last) == JUMP_INSN
5194 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5195 last = prev_active_insn (last);
5196 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5197 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5199 init_insn_group_barriers ();
5201 else if (INSN_P (insn))
5203 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5204 init_insn_group_barriers ();
5205 else if (group_barrier_needed_p (insn))
5207 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5208 init_insn_group_barriers ();
5209 group_barrier_needed_p (insn);
5216 static int errata_find_address_regs (rtx *, void *);
5217 static void errata_emit_nops (rtx);
5218 static void fixup_errata (void);
5220 /* This structure is used to track some details about the previous insns
5221 groups so we can determine if it may be necessary to insert NOPs to
5222 workaround hardware errata. */
5225 HARD_REG_SET p_reg_set;
5226 HARD_REG_SET gr_reg_conditionally_set;
5229 /* Index into the last_group array. */
5230 static int group_idx;
5232 /* Called through for_each_rtx; determines if a hard register that was
5233 conditionally set in the previous group is used as an address register.
5234 It ensures that for_each_rtx returns 1 in that case. */
5236 errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5239 if (GET_CODE (x) != MEM)
5242 if (GET_CODE (x) == POST_MODIFY)
5244 if (GET_CODE (x) == REG)
5246 struct group *prev_group = last_group + (group_idx ^ 1);
5247 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5255 /* Called for each insn; this function keeps track of the state in
5256 last_group and emits additional NOPs if necessary to work around
5257 an Itanium A/B step erratum. */
5259 errata_emit_nops (rtx insn)
5261 struct group *this_group = last_group + group_idx;
5262 struct group *prev_group = last_group + (group_idx ^ 1);
5263 rtx pat = PATTERN (insn);
5264 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5265 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5266 enum attr_type type;
5269 if (GET_CODE (real_pat) == USE
5270 || GET_CODE (real_pat) == CLOBBER
5271 || GET_CODE (real_pat) == ASM_INPUT
5272 || GET_CODE (real_pat) == ADDR_VEC
5273 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5274 || asm_noperands (PATTERN (insn)) >= 0)
5277 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5280 if (GET_CODE (set) == PARALLEL)
5283 set = XVECEXP (real_pat, 0, 0);
5284 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5285 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5286 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5293 if (set && GET_CODE (set) != SET)
5296 type = get_attr_type (insn);
5299 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5300 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5302 if ((type == TYPE_M || type == TYPE_A) && cond && set
5303 && REG_P (SET_DEST (set))
5304 && GET_CODE (SET_SRC (set)) != PLUS
5305 && GET_CODE (SET_SRC (set)) != MINUS
5306 && (GET_CODE (SET_SRC (set)) != ASHIFT
5307 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5308 && (GET_CODE (SET_SRC (set)) != MEM
5309 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5310 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5312 if (!COMPARISON_P (cond)
5313 || !REG_P (XEXP (cond, 0)))
5316 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5317 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5319 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5321 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5322 emit_insn_before (gen_nop (), insn);
5323 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5325 memset (last_group, 0, sizeof last_group);
5329 /* Emit extra nops if they are required to work around hardware errata. */
5336 if (! TARGET_B_STEP)
5340 memset (last_group, 0, sizeof last_group);
5342 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5347 if (ia64_safe_type (insn) == TYPE_S)
5350 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5353 errata_emit_nops (insn);
5358 /* Instruction scheduling support. */
5360 #define NR_BUNDLES 10
5362 /* A list of names of all available bundles. */
5364 static const char *bundle_name [NR_BUNDLES] =
5370 #if NR_BUNDLES == 10
5380 /* Nonzero if we should insert stop bits into the schedule. */
5382 int ia64_final_schedule = 0;
5384 /* Codes of the corresponding quieryied units: */
5386 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5387 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5389 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5390 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5392 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5394 /* The following variable value is an insn group barrier. */
5396 static rtx dfa_stop_insn;
5398 /* The following variable value is the last issued insn. */
5400 static rtx last_scheduled_insn;
5402 /* The following variable value is size of the DFA state. */
5404 static size_t dfa_state_size;
5406 /* The following variable value is pointer to a DFA state used as
5407 temporary variable. */
5409 static state_t temp_dfa_state = NULL;
5411 /* The following variable value is DFA state after issuing the last
5414 static state_t prev_cycle_state = NULL;
5416 /* The following array element values are TRUE if the corresponding
5417 insn requires to add stop bits before it. */
5419 static char *stops_p;
5421 /* The following variable is used to set up the mentioned above array. */
5423 static int stop_before_p = 0;
5425 /* The following variable value is length of the arrays `clocks' and
5428 static int clocks_length;
5430 /* The following array element values are cycles on which the
5431 corresponding insn will be issued. The array is used only for
5436 /* The following array element values are numbers of cycles should be
5437 added to improve insn scheduling for MM_insns for Itanium1. */
5439 static int *add_cycles;
5441 static rtx ia64_single_set (rtx);
5442 static void ia64_emit_insn_before (rtx, rtx);
5444 /* Map a bundle number to its pseudo-op. */
5447 get_bundle_name (int b)
5449 return bundle_name[b];
5453 /* Return the maximum number of instructions a cpu can issue. */
5456 ia64_issue_rate (void)
5461 /* Helper function - like single_set, but look inside COND_EXEC. */
5464 ia64_single_set (rtx insn)
5466 rtx x = PATTERN (insn), ret;
5467 if (GET_CODE (x) == COND_EXEC)
5468 x = COND_EXEC_CODE (x);
5469 if (GET_CODE (x) == SET)
5472 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5473 Although they are not classical single set, the second set is there just
5474 to protect it from moving past FP-relative stack accesses. */
5475 switch (recog_memoized (insn))
5477 case CODE_FOR_prologue_allocate_stack:
5478 case CODE_FOR_epilogue_deallocate_stack:
5479 ret = XVECEXP (x, 0, 0);
5483 ret = single_set_2 (insn, x);
5490 /* Adjust the cost of a scheduling dependency. Return the new cost of
5491 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5494 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
5496 enum attr_itanium_class dep_class;
5497 enum attr_itanium_class insn_class;
5499 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5502 insn_class = ia64_safe_itanium_class (insn);
5503 dep_class = ia64_safe_itanium_class (dep_insn);
5504 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5505 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5511 /* Like emit_insn_before, but skip cycle_display notes.
5512 ??? When cycle display notes are implemented, update this. */
5515 ia64_emit_insn_before (rtx insn, rtx before)
5517 emit_insn_before (insn, before);
5520 /* The following function marks insns who produce addresses for load
5521 and store insns. Such insns will be placed into M slots because it
5522 decrease latency time for Itanium1 (see function
5523 `ia64_produce_address_p' and the DFA descriptions). */
5526 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
5528 rtx insn, link, next, next_tail;
5530 next_tail = NEXT_INSN (tail);
5531 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5534 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5536 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5538 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5540 next = XEXP (link, 0);
5541 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5542 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5543 && ia64_st_address_bypass_p (insn, next))
5545 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5546 || ia64_safe_itanium_class (next)
5547 == ITANIUM_CLASS_FLD)
5548 && ia64_ld_address_bypass_p (insn, next))
5551 insn->call = link != 0;
5555 /* We're beginning a new block. Initialize data structures as necessary. */
5558 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
5559 int sched_verbose ATTRIBUTE_UNUSED,
5560 int max_ready ATTRIBUTE_UNUSED)
5562 #ifdef ENABLE_CHECKING
5565 if (reload_completed)
5566 for (insn = NEXT_INSN (current_sched_info->prev_head);
5567 insn != current_sched_info->next_tail;
5568 insn = NEXT_INSN (insn))
5569 if (SCHED_GROUP_P (insn))
5572 last_scheduled_insn = NULL_RTX;
5573 init_insn_group_barriers ();
5576 /* We are about to being issuing insns for this clock cycle.
5577 Override the default sort algorithm to better slot instructions. */
5580 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
5581 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
5585 int n_ready = *pn_ready;
5586 rtx *e_ready = ready + n_ready;
5590 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
5592 if (reorder_type == 0)
5594 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5596 for (insnp = ready; insnp < e_ready; insnp++)
5597 if (insnp < e_ready)
5600 enum attr_type t = ia64_safe_type (insn);
5601 if (t == TYPE_UNKNOWN)
5603 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5604 || asm_noperands (PATTERN (insn)) >= 0)
5606 rtx lowest = ready[n_asms];
5607 ready[n_asms] = insn;
5613 rtx highest = ready[n_ready - 1];
5614 ready[n_ready - 1] = insn;
5621 if (n_asms < n_ready)
5623 /* Some normal insns to process. Skip the asms. */
5627 else if (n_ready > 0)
5631 if (ia64_final_schedule)
5634 int nr_need_stop = 0;
5636 for (insnp = ready; insnp < e_ready; insnp++)
5637 if (safe_group_barrier_needed_p (*insnp))
5640 if (reorder_type == 1 && n_ready == nr_need_stop)
5642 if (reorder_type == 0)
5645 /* Move down everything that needs a stop bit, preserving
5647 while (insnp-- > ready + deleted)
5648 while (insnp >= ready + deleted)
5651 if (! safe_group_barrier_needed_p (insn))
5653 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5664 /* We are about to being issuing insns for this clock cycle. Override
5665 the default sort algorithm to better slot instructions. */
5668 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
5671 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
5672 pn_ready, clock_var, 0);
5675 /* Like ia64_sched_reorder, but called after issuing each insn.
5676 Override the default sort algorithm to better slot instructions. */
5679 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
5680 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
5681 int *pn_ready, int clock_var)
5683 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
5684 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
5685 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5689 /* We are about to issue INSN. Return the number of insns left on the
5690 ready queue that can be issued this cycle. */
5693 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
5694 int sched_verbose ATTRIBUTE_UNUSED,
5695 rtx insn ATTRIBUTE_UNUSED,
5696 int can_issue_more ATTRIBUTE_UNUSED)
5698 last_scheduled_insn = insn;
5699 memcpy (prev_cycle_state, curr_state, dfa_state_size);
5700 if (reload_completed)
5702 if (group_barrier_needed_p (insn))
5704 if (GET_CODE (insn) == CALL_INSN)
5705 init_insn_group_barriers ();
5706 stops_p [INSN_UID (insn)] = stop_before_p;
5712 /* We are choosing insn from the ready queue. Return nonzero if INSN
5716 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
5718 if (insn == NULL_RTX || !INSN_P (insn))
5720 return (!reload_completed
5721 || !safe_group_barrier_needed_p (insn));
5724 /* The following variable value is pseudo-insn used by the DFA insn
5725 scheduler to change the DFA state when the simulated clock is
5728 static rtx dfa_pre_cycle_insn;
5730 /* We are about to being issuing INSN. Return nonzero if we cannot
5731 issue it on given cycle CLOCK and return zero if we should not sort
5732 the ready queue on the next clock start. */
5735 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
5736 int clock, int *sort_p)
5738 int setup_clocks_p = FALSE;
5740 if (insn == NULL_RTX || !INSN_P (insn))
5742 if ((reload_completed && safe_group_barrier_needed_p (insn))
5743 || (last_scheduled_insn
5744 && (GET_CODE (last_scheduled_insn) == CALL_INSN
5745 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
5746 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
5748 init_insn_group_barriers ();
5749 if (verbose && dump)
5750 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
5751 last_clock == clock ? " + cycle advance" : "");
5753 if (last_clock == clock)
5755 state_transition (curr_state, dfa_stop_insn);
5756 if (TARGET_EARLY_STOP_BITS)
5757 *sort_p = (last_scheduled_insn == NULL_RTX
5758 || GET_CODE (last_scheduled_insn) != CALL_INSN);
5763 else if (reload_completed)
5764 setup_clocks_p = TRUE;
5765 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
5766 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
5767 state_reset (curr_state);
5770 memcpy (curr_state, prev_cycle_state, dfa_state_size);
5771 state_transition (curr_state, dfa_stop_insn);
5772 state_transition (curr_state, dfa_pre_cycle_insn);
5773 state_transition (curr_state, NULL);
5776 else if (reload_completed)
5777 setup_clocks_p = TRUE;
5778 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
5779 && GET_CODE (PATTERN (insn)) != ASM_INPUT
5780 && asm_noperands (PATTERN (insn)) < 0)
5782 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
5784 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
5789 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
5790 if (REG_NOTE_KIND (link) == 0)
5792 enum attr_itanium_class dep_class;
5793 rtx dep_insn = XEXP (link, 0);
5795 dep_class = ia64_safe_itanium_class (dep_insn);
5796 if ((dep_class == ITANIUM_CLASS_MMMUL
5797 || dep_class == ITANIUM_CLASS_MMSHF)
5798 && last_clock - clocks [INSN_UID (dep_insn)] < 4
5800 || last_clock - clocks [INSN_UID (dep_insn)] < d))
5801 d = last_clock - clocks [INSN_UID (dep_insn)];
5804 add_cycles [INSN_UID (insn)] = 3 - d;
5812 /* The following page contains abstract data `bundle states' which are
5813 used for bundling insns (inserting nops and template generation). */
5815 /* The following describes state of insn bundling. */
5819 /* Unique bundle state number to identify them in the debugging
5822 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
5823 /* number nops before and after the insn */
5824 short before_nops_num, after_nops_num;
5825 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
5827 int cost; /* cost of the state in cycles */
5828 int accumulated_insns_num; /* number of all previous insns including
5829 nops. L is considered as 2 insns */
5830 int branch_deviation; /* deviation of previous branches from 3rd slots */
5831 struct bundle_state *next; /* next state with the same insn_num */
5832 struct bundle_state *originator; /* originator (previous insn state) */
5833 /* All bundle states are in the following chain. */
5834 struct bundle_state *allocated_states_chain;
5835 /* The DFA State after issuing the insn and the nops. */
5839 /* The following is map insn number to the corresponding bundle state. */
5841 static struct bundle_state **index_to_bundle_states;
5843 /* The unique number of next bundle state. */
5845 static int bundle_states_num;
5847 /* All allocated bundle states are in the following chain. */
5849 static struct bundle_state *allocated_bundle_states_chain;
5851 /* All allocated but not used bundle states are in the following
5854 static struct bundle_state *free_bundle_state_chain;
5857 /* The following function returns a free bundle state. */
5859 static struct bundle_state *
5860 get_free_bundle_state (void)
5862 struct bundle_state *result;
5864 if (free_bundle_state_chain != NULL)
5866 result = free_bundle_state_chain;
5867 free_bundle_state_chain = result->next;
5871 result = xmalloc (sizeof (struct bundle_state));
5872 result->dfa_state = xmalloc (dfa_state_size);
5873 result->allocated_states_chain = allocated_bundle_states_chain;
5874 allocated_bundle_states_chain = result;
5876 result->unique_num = bundle_states_num++;
5881 /* The following function frees given bundle state. */
5884 free_bundle_state (struct bundle_state *state)
5886 state->next = free_bundle_state_chain;
5887 free_bundle_state_chain = state;
5890 /* Start work with abstract data `bundle states'. */
5893 initiate_bundle_states (void)
5895 bundle_states_num = 0;
5896 free_bundle_state_chain = NULL;
5897 allocated_bundle_states_chain = NULL;
5900 /* Finish work with abstract data `bundle states'. */
5903 finish_bundle_states (void)
5905 struct bundle_state *curr_state, *next_state;
5907 for (curr_state = allocated_bundle_states_chain;
5909 curr_state = next_state)
5911 next_state = curr_state->allocated_states_chain;
5912 free (curr_state->dfa_state);
5917 /* Hash table of the bundle states. The key is dfa_state and insn_num
5918 of the bundle states. */
5920 static htab_t bundle_state_table;
5922 /* The function returns hash of BUNDLE_STATE. */
5925 bundle_state_hash (const void *bundle_state)
5927 const struct bundle_state *state = (struct bundle_state *) bundle_state;
5930 for (result = i = 0; i < dfa_state_size; i++)
5931 result += (((unsigned char *) state->dfa_state) [i]
5932 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
5933 return result + state->insn_num;
5936 /* The function returns nonzero if the bundle state keys are equal. */
5939 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
5941 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
5942 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
5944 return (state1->insn_num == state2->insn_num
5945 && memcmp (state1->dfa_state, state2->dfa_state,
5946 dfa_state_size) == 0);
5949 /* The function inserts the BUNDLE_STATE into the hash table. The
5950 function returns nonzero if the bundle has been inserted into the
5951 table. The table contains the best bundle state with given key. */
5954 insert_bundle_state (struct bundle_state *bundle_state)
5958 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
5959 if (*entry_ptr == NULL)
5961 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
5962 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
5963 *entry_ptr = (void *) bundle_state;
5966 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
5967 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
5968 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
5969 > bundle_state->accumulated_insns_num
5970 || (((struct bundle_state *)
5971 *entry_ptr)->accumulated_insns_num
5972 == bundle_state->accumulated_insns_num
5973 && ((struct bundle_state *)
5974 *entry_ptr)->branch_deviation
5975 > bundle_state->branch_deviation))))
5978 struct bundle_state temp;
5980 temp = *(struct bundle_state *) *entry_ptr;
5981 *(struct bundle_state *) *entry_ptr = *bundle_state;
5982 ((struct bundle_state *) *entry_ptr)->next = temp.next;
5983 *bundle_state = temp;
5988 /* Start work with the hash table. */
5991 initiate_bundle_state_table (void)
5993 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
5997 /* Finish work with the hash table. */
6000 finish_bundle_state_table (void)
6002 htab_delete (bundle_state_table);
6007 /* The following variable is a insn `nop' used to check bundle states
6008 with different number of inserted nops. */
6010 static rtx ia64_nop;
6012 /* The following function tries to issue NOPS_NUM nops for the current
6013 state without advancing processor cycle. If it failed, the
6014 function returns FALSE and frees the current state. */
6017 try_issue_nops (struct bundle_state *curr_state, int nops_num)
6021 for (i = 0; i < nops_num; i++)
6022 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6024 free_bundle_state (curr_state);
6030 /* The following function tries to issue INSN for the current
6031 state without advancing processor cycle. If it failed, the
6032 function returns FALSE and frees the current state. */
6035 try_issue_insn (struct bundle_state *curr_state, rtx insn)
6037 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6039 free_bundle_state (curr_state);
6045 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6046 starting with ORIGINATOR without advancing processor cycle. If
6047 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6048 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6049 If it was successful, the function creates new bundle state and
6050 insert into the hash table and into `index_to_bundle_states'. */
6053 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6054 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6056 struct bundle_state *curr_state;
6058 curr_state = get_free_bundle_state ();
6059 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6060 curr_state->insn = insn;
6061 curr_state->insn_num = originator->insn_num + 1;
6062 curr_state->cost = originator->cost;
6063 curr_state->originator = originator;
6064 curr_state->before_nops_num = before_nops_num;
6065 curr_state->after_nops_num = 0;
6066 curr_state->accumulated_insns_num
6067 = originator->accumulated_insns_num + before_nops_num;
6068 curr_state->branch_deviation = originator->branch_deviation;
6069 if (insn == NULL_RTX)
6071 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6073 if (GET_MODE (insn) == TImode)
6075 if (!try_issue_nops (curr_state, before_nops_num))
6077 if (!try_issue_insn (curr_state, insn))
6079 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6080 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6081 && curr_state->accumulated_insns_num % 3 != 0)
6083 free_bundle_state (curr_state);
6087 else if (GET_MODE (insn) != TImode)
6089 if (!try_issue_nops (curr_state, before_nops_num))
6091 if (!try_issue_insn (curr_state, insn))
6093 curr_state->accumulated_insns_num++;
6094 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6095 || asm_noperands (PATTERN (insn)) >= 0)
6097 if (ia64_safe_type (insn) == TYPE_L)
6098 curr_state->accumulated_insns_num++;
6102 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6103 state_transition (curr_state->dfa_state, NULL);
6105 if (!try_issue_nops (curr_state, before_nops_num))
6107 if (!try_issue_insn (curr_state, insn))
6109 curr_state->accumulated_insns_num++;
6110 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6111 || asm_noperands (PATTERN (insn)) >= 0)
6113 /* Finish bundle containing asm insn. */
6114 curr_state->after_nops_num
6115 = 3 - curr_state->accumulated_insns_num % 3;
6116 curr_state->accumulated_insns_num
6117 += 3 - curr_state->accumulated_insns_num % 3;
6119 else if (ia64_safe_type (insn) == TYPE_L)
6120 curr_state->accumulated_insns_num++;
6122 if (ia64_safe_type (insn) == TYPE_B)
6123 curr_state->branch_deviation
6124 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6125 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6127 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6130 struct bundle_state *curr_state1;
6131 struct bundle_state *allocated_states_chain;
6133 curr_state1 = get_free_bundle_state ();
6134 dfa_state = curr_state1->dfa_state;
6135 allocated_states_chain = curr_state1->allocated_states_chain;
6136 *curr_state1 = *curr_state;
6137 curr_state1->dfa_state = dfa_state;
6138 curr_state1->allocated_states_chain = allocated_states_chain;
6139 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6141 curr_state = curr_state1;
6143 if (!try_issue_nops (curr_state,
6144 3 - curr_state->accumulated_insns_num % 3))
6146 curr_state->after_nops_num
6147 = 3 - curr_state->accumulated_insns_num % 3;
6148 curr_state->accumulated_insns_num
6149 += 3 - curr_state->accumulated_insns_num % 3;
6151 if (!insert_bundle_state (curr_state))
6152 free_bundle_state (curr_state);
6156 /* The following function returns position in the two window bundle
6160 get_max_pos (state_t state)
6162 if (cpu_unit_reservation_p (state, pos_6))
6164 else if (cpu_unit_reservation_p (state, pos_5))
6166 else if (cpu_unit_reservation_p (state, pos_4))
6168 else if (cpu_unit_reservation_p (state, pos_3))
6170 else if (cpu_unit_reservation_p (state, pos_2))
6172 else if (cpu_unit_reservation_p (state, pos_1))
6178 /* The function returns code of a possible template for given position
6179 and state. The function should be called only with 2 values of
6180 position equal to 3 or 6. */
6183 get_template (state_t state, int pos)
6188 if (cpu_unit_reservation_p (state, _0mii_))
6190 else if (cpu_unit_reservation_p (state, _0mmi_))
6192 else if (cpu_unit_reservation_p (state, _0mfi_))
6194 else if (cpu_unit_reservation_p (state, _0mmf_))
6196 else if (cpu_unit_reservation_p (state, _0bbb_))
6198 else if (cpu_unit_reservation_p (state, _0mbb_))
6200 else if (cpu_unit_reservation_p (state, _0mib_))
6202 else if (cpu_unit_reservation_p (state, _0mmb_))
6204 else if (cpu_unit_reservation_p (state, _0mfb_))
6206 else if (cpu_unit_reservation_p (state, _0mlx_))
6211 if (cpu_unit_reservation_p (state, _1mii_))
6213 else if (cpu_unit_reservation_p (state, _1mmi_))
6215 else if (cpu_unit_reservation_p (state, _1mfi_))
6217 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6219 else if (cpu_unit_reservation_p (state, _1bbb_))
6221 else if (cpu_unit_reservation_p (state, _1mbb_))
6223 else if (cpu_unit_reservation_p (state, _1mib_))
6225 else if (cpu_unit_reservation_p (state, _1mmb_))
6227 else if (cpu_unit_reservation_p (state, _1mfb_))
6229 else if (cpu_unit_reservation_p (state, _1mlx_))
6238 /* The following function returns an insn important for insn bundling
6239 followed by INSN and before TAIL. */
6242 get_next_important_insn (rtx insn, rtx tail)
6244 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6246 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6247 && GET_CODE (PATTERN (insn)) != USE
6248 && GET_CODE (PATTERN (insn)) != CLOBBER)
6253 /* The following function does insn bundling. Bundling means
6254 inserting templates and nop insns to fit insn groups into permitted
6255 templates. Instruction scheduling uses NDFA (non-deterministic
6256 finite automata) encoding informations about the templates and the
6257 inserted nops. Nondeterminism of the automata permits follows
6258 all possible insn sequences very fast.
6260 Unfortunately it is not possible to get information about inserting
6261 nop insns and used templates from the automata states. The
6262 automata only says that we can issue an insn possibly inserting
6263 some nops before it and using some template. Therefore insn
6264 bundling in this function is implemented by using DFA
6265 (deterministic finite automata). We follows all possible insn
6266 sequences by inserting 0-2 nops (that is what the NDFA describe for
6267 insn scheduling) before/after each insn being bundled. We know the
6268 start of simulated processor cycle from insn scheduling (insn
6269 starting a new cycle has TImode).
6271 Simple implementation of insn bundling would create enormous
6272 number of possible insn sequences satisfying information about new
6273 cycle ticks taken from the insn scheduling. To make the algorithm
6274 practical we use dynamic programming. Each decision (about
6275 inserting nops and implicitly about previous decisions) is described
6276 by structure bundle_state (see above). If we generate the same
6277 bundle state (key is automaton state after issuing the insns and
6278 nops for it), we reuse already generated one. As consequence we
6279 reject some decisions which cannot improve the solution and
6280 reduce memory for the algorithm.
6282 When we reach the end of EBB (extended basic block), we choose the
6283 best sequence and then, moving back in EBB, insert templates for
6284 the best alternative. The templates are taken from querying
6285 automaton state for each insn in chosen bundle states.
6287 So the algorithm makes two (forward and backward) passes through
6288 EBB. There is an additional forward pass through EBB for Itanium1
6289 processor. This pass inserts more nops to make dependency between
6290 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6293 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6295 struct bundle_state *curr_state, *next_state, *best_state;
6296 rtx insn, next_insn;
6298 int i, bundle_end_p, only_bundle_end_p, asm_p;
6299 int pos = 0, max_pos, template0, template1;
6302 enum attr_type type;
6305 /* Count insns in the EBB. */
6306 for (insn = NEXT_INSN (prev_head_insn);
6307 insn && insn != tail;
6308 insn = NEXT_INSN (insn))
6314 dfa_clean_insn_cache ();
6315 initiate_bundle_state_table ();
6316 index_to_bundle_states = xmalloc ((insn_num + 2)
6317 * sizeof (struct bundle_state *));
6318 /* First (forward) pass -- generation of bundle states. */
6319 curr_state = get_free_bundle_state ();
6320 curr_state->insn = NULL;
6321 curr_state->before_nops_num = 0;
6322 curr_state->after_nops_num = 0;
6323 curr_state->insn_num = 0;
6324 curr_state->cost = 0;
6325 curr_state->accumulated_insns_num = 0;
6326 curr_state->branch_deviation = 0;
6327 curr_state->next = NULL;
6328 curr_state->originator = NULL;
6329 state_reset (curr_state->dfa_state);
6330 index_to_bundle_states [0] = curr_state;
6332 /* Shift cycle mark if it is put on insn which could be ignored. */
6333 for (insn = NEXT_INSN (prev_head_insn);
6335 insn = NEXT_INSN (insn))
6337 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6338 || GET_CODE (PATTERN (insn)) == USE
6339 || GET_CODE (PATTERN (insn)) == CLOBBER)
6340 && GET_MODE (insn) == TImode)
6342 PUT_MODE (insn, VOIDmode);
6343 for (next_insn = NEXT_INSN (insn);
6345 next_insn = NEXT_INSN (next_insn))
6346 if (INSN_P (next_insn)
6347 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6348 && GET_CODE (PATTERN (next_insn)) != USE
6349 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6351 PUT_MODE (next_insn, TImode);
6355 /* Froward pass: generation of bundle states. */
6356 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6361 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6362 || GET_CODE (PATTERN (insn)) == USE
6363 || GET_CODE (PATTERN (insn)) == CLOBBER)
6365 type = ia64_safe_type (insn);
6366 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6368 index_to_bundle_states [insn_num] = NULL;
6369 for (curr_state = index_to_bundle_states [insn_num - 1];
6371 curr_state = next_state)
6373 pos = curr_state->accumulated_insns_num % 3;
6374 next_state = curr_state->next;
6375 /* We must fill up the current bundle in order to start a
6376 subsequent asm insn in a new bundle. Asm insn is always
6377 placed in a separate bundle. */
6379 = (next_insn != NULL_RTX
6380 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6381 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6382 /* We may fill up the current bundle if it is the cycle end
6383 without a group barrier. */
6385 = (only_bundle_end_p || next_insn == NULL_RTX
6386 || (GET_MODE (next_insn) == TImode
6387 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6388 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6390 /* We need to insert 2 nops for cases like M_MII. To
6391 guarantee issuing all insns on the same cycle for
6392 Itanium 1, we need to issue 2 nops after the first M
6393 insn (MnnMII where n is a nop insn). */
6394 || ((type == TYPE_M || type == TYPE_A)
6395 && ia64_tune == PROCESSOR_ITANIUM
6396 && !bundle_end_p && pos == 1))
6397 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6399 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6401 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6404 if (index_to_bundle_states [insn_num] == NULL)
6406 for (curr_state = index_to_bundle_states [insn_num];
6408 curr_state = curr_state->next)
6409 if (verbose >= 2 && dump)
6411 /* This structure is taken from generated code of the
6412 pipeline hazard recognizer (see file insn-attrtab.c).
6413 Please don't forget to change the structure if a new
6414 automaton is added to .md file. */
6417 unsigned short one_automaton_state;
6418 unsigned short oneb_automaton_state;
6419 unsigned short two_automaton_state;
6420 unsigned short twob_automaton_state;
6425 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6426 curr_state->unique_num,
6427 (curr_state->originator == NULL
6428 ? -1 : curr_state->originator->unique_num),
6430 curr_state->before_nops_num, curr_state->after_nops_num,
6431 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6432 (ia64_tune == PROCESSOR_ITANIUM
6433 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6434 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6438 if (index_to_bundle_states [insn_num] == NULL)
6439 /* We should find a solution because the 2nd insn scheduling has
6442 /* Find a state corresponding to the best insn sequence. */
6444 for (curr_state = index_to_bundle_states [insn_num];
6446 curr_state = curr_state->next)
6447 /* We are just looking at the states with fully filled up last
6448 bundle. The first we prefer insn sequences with minimal cost
6449 then with minimal inserted nops and finally with branch insns
6450 placed in the 3rd slots. */
6451 if (curr_state->accumulated_insns_num % 3 == 0
6452 && (best_state == NULL || best_state->cost > curr_state->cost
6453 || (best_state->cost == curr_state->cost
6454 && (curr_state->accumulated_insns_num
6455 < best_state->accumulated_insns_num
6456 || (curr_state->accumulated_insns_num
6457 == best_state->accumulated_insns_num
6458 && curr_state->branch_deviation
6459 < best_state->branch_deviation)))))
6460 best_state = curr_state;
6461 /* Second (backward) pass: adding nops and templates. */
6462 insn_num = best_state->before_nops_num;
6463 template0 = template1 = -1;
6464 for (curr_state = best_state;
6465 curr_state->originator != NULL;
6466 curr_state = curr_state->originator)
6468 insn = curr_state->insn;
6469 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6470 || asm_noperands (PATTERN (insn)) >= 0);
6472 if (verbose >= 2 && dump)
6476 unsigned short one_automaton_state;
6477 unsigned short oneb_automaton_state;
6478 unsigned short two_automaton_state;
6479 unsigned short twob_automaton_state;
6484 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6485 curr_state->unique_num,
6486 (curr_state->originator == NULL
6487 ? -1 : curr_state->originator->unique_num),
6489 curr_state->before_nops_num, curr_state->after_nops_num,
6490 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6491 (ia64_tune == PROCESSOR_ITANIUM
6492 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6493 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6496 /* Find the position in the current bundle window. The window can
6497 contain at most two bundles. Two bundle window means that
6498 the processor will make two bundle rotation. */
6499 max_pos = get_max_pos (curr_state->dfa_state);
6501 /* The following (negative template number) means that the
6502 processor did one bundle rotation. */
6503 || (max_pos == 3 && template0 < 0))
6505 /* We are at the end of the window -- find template(s) for
6509 template0 = get_template (curr_state->dfa_state, 3);
6512 template1 = get_template (curr_state->dfa_state, 3);
6513 template0 = get_template (curr_state->dfa_state, 6);
6516 if (max_pos > 3 && template1 < 0)
6517 /* It may happen when we have the stop inside a bundle. */
6521 template1 = get_template (curr_state->dfa_state, 3);
6525 /* Emit nops after the current insn. */
6526 for (i = 0; i < curr_state->after_nops_num; i++)
6529 emit_insn_after (nop, insn);
6535 /* We are at the start of a bundle: emit the template
6536 (it should be defined). */
6539 b = gen_bundle_selector (GEN_INT (template0));
6540 ia64_emit_insn_before (b, nop);
6541 /* If we have two bundle window, we make one bundle
6542 rotation. Otherwise template0 will be undefined
6543 (negative value). */
6544 template0 = template1;
6548 /* Move the position backward in the window. Group barrier has
6549 no slot. Asm insn takes all bundle. */
6550 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6551 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6552 && asm_noperands (PATTERN (insn)) < 0)
6554 /* Long insn takes 2 slots. */
6555 if (ia64_safe_type (insn) == TYPE_L)
6560 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6561 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6562 && asm_noperands (PATTERN (insn)) < 0)
6564 /* The current insn is at the bundle start: emit the
6568 b = gen_bundle_selector (GEN_INT (template0));
6569 ia64_emit_insn_before (b, insn);
6570 b = PREV_INSN (insn);
6572 /* See comment above in analogous place for emitting nops
6574 template0 = template1;
6577 /* Emit nops after the current insn. */
6578 for (i = 0; i < curr_state->before_nops_num; i++)
6581 ia64_emit_insn_before (nop, insn);
6582 nop = PREV_INSN (insn);
6589 /* See comment above in analogous place for emitting nops
6593 b = gen_bundle_selector (GEN_INT (template0));
6594 ia64_emit_insn_before (b, insn);
6595 b = PREV_INSN (insn);
6597 template0 = template1;
6602 if (ia64_tune == PROCESSOR_ITANIUM)
6603 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6604 Itanium1 has a strange design, if the distance between an insn
6605 and dependent MM-insn is less 4 then we have a 6 additional
6606 cycles stall. So we make the distance equal to 4 cycles if it
6608 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6613 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6614 || GET_CODE (PATTERN (insn)) == USE
6615 || GET_CODE (PATTERN (insn)) == CLOBBER)
6617 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6618 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6619 /* We found a MM-insn which needs additional cycles. */
6625 /* Now we are searching for a template of the bundle in
6626 which the MM-insn is placed and the position of the
6627 insn in the bundle (0, 1, 2). Also we are searching
6628 for that there is a stop before the insn. */
6629 last = prev_active_insn (insn);
6630 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
6632 last = prev_active_insn (last);
6634 for (;; last = prev_active_insn (last))
6635 if (recog_memoized (last) == CODE_FOR_bundle_selector)
6637 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
6639 /* The insn is in MLX bundle. Change the template
6640 onto MFI because we will add nops before the
6641 insn. It simplifies subsequent code a lot. */
6643 = gen_bundle_selector (const2_rtx); /* -> MFI */
6646 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
6647 && (ia64_safe_itanium_class (last)
6648 != ITANIUM_CLASS_IGNORE))
6650 /* Some check of correctness: the stop is not at the
6651 bundle start, there are no more 3 insns in the bundle,
6652 and the MM-insn is not at the start of bundle with
6654 if ((pred_stop_p && n == 0) || n > 2
6655 || (template0 == 9 && n != 0))
6657 /* Put nops after the insn in the bundle. */
6658 for (j = 3 - n; j > 0; j --)
6659 ia64_emit_insn_before (gen_nop (), insn);
6660 /* It takes into account that we will add more N nops
6661 before the insn lately -- please see code below. */
6662 add_cycles [INSN_UID (insn)]--;
6663 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
6664 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6667 add_cycles [INSN_UID (insn)]--;
6668 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
6670 /* Insert "MII;" template. */
6671 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
6673 ia64_emit_insn_before (gen_nop (), insn);
6674 ia64_emit_insn_before (gen_nop (), insn);
6677 /* To decrease code size, we use "MI;I;"
6679 ia64_emit_insn_before
6680 (gen_insn_group_barrier (GEN_INT (3)), insn);
6683 ia64_emit_insn_before (gen_nop (), insn);
6684 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6687 /* Put the MM-insn in the same slot of a bundle with the
6688 same template as the original one. */
6689 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
6691 /* To put the insn in the same slot, add necessary number
6693 for (j = n; j > 0; j --)
6694 ia64_emit_insn_before (gen_nop (), insn);
6695 /* Put the stop if the original bundle had it. */
6697 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6701 free (index_to_bundle_states);
6702 finish_bundle_state_table ();
6704 dfa_clean_insn_cache ();
6707 /* The following function is called at the end of scheduling BB or
6708 EBB. After reload, it inserts stop bits and does insn bundling. */
6711 ia64_sched_finish (FILE *dump, int sched_verbose)
6714 fprintf (dump, "// Finishing schedule.\n");
6715 if (!reload_completed)
6717 if (reload_completed)
6719 final_emit_insn_group_barriers (dump);
6720 bundling (dump, sched_verbose, current_sched_info->prev_head,
6721 current_sched_info->next_tail);
6722 if (sched_verbose && dump)
6723 fprintf (dump, "// finishing %d-%d\n",
6724 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
6725 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
6731 /* The following function inserts stop bits in scheduled BB or EBB. */
6734 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6737 int need_barrier_p = 0;
6738 rtx prev_insn = NULL_RTX;
6740 init_insn_group_barriers ();
6742 for (insn = NEXT_INSN (current_sched_info->prev_head);
6743 insn != current_sched_info->next_tail;
6744 insn = NEXT_INSN (insn))
6746 if (GET_CODE (insn) == BARRIER)
6748 rtx last = prev_active_insn (insn);
6752 if (GET_CODE (last) == JUMP_INSN
6753 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6754 last = prev_active_insn (last);
6755 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6756 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6758 init_insn_group_barriers ();
6760 prev_insn = NULL_RTX;
6762 else if (INSN_P (insn))
6764 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6766 init_insn_group_barriers ();
6768 prev_insn = NULL_RTX;
6770 else if (need_barrier_p || group_barrier_needed_p (insn))
6772 if (TARGET_EARLY_STOP_BITS)
6777 last != current_sched_info->prev_head;
6778 last = PREV_INSN (last))
6779 if (INSN_P (last) && GET_MODE (last) == TImode
6780 && stops_p [INSN_UID (last)])
6782 if (last == current_sched_info->prev_head)
6784 last = prev_active_insn (last);
6786 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
6787 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6789 init_insn_group_barriers ();
6790 for (last = NEXT_INSN (last);
6792 last = NEXT_INSN (last))
6794 group_barrier_needed_p (last);
6798 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6800 init_insn_group_barriers ();
6802 group_barrier_needed_p (insn);
6803 prev_insn = NULL_RTX;
6805 else if (recog_memoized (insn) >= 0)
6807 need_barrier_p = (GET_CODE (insn) == CALL_INSN
6808 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6809 || asm_noperands (PATTERN (insn)) >= 0);
6816 /* If the following function returns TRUE, we will use the the DFA
6820 ia64_first_cycle_multipass_dfa_lookahead (void)
6822 return (reload_completed ? 6 : 4);
6825 /* The following function initiates variable `dfa_pre_cycle_insn'. */
6828 ia64_init_dfa_pre_cycle_insn (void)
6830 if (temp_dfa_state == NULL)
6832 dfa_state_size = state_size ();
6833 temp_dfa_state = xmalloc (dfa_state_size);
6834 prev_cycle_state = xmalloc (dfa_state_size);
6836 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
6837 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
6838 recog_memoized (dfa_pre_cycle_insn);
6839 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
6840 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
6841 recog_memoized (dfa_stop_insn);
6844 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
6845 used by the DFA insn scheduler. */
6848 ia64_dfa_pre_cycle_insn (void)
6850 return dfa_pre_cycle_insn;
6853 /* The following function returns TRUE if PRODUCER (of type ilog or
6854 ld) produces address for CONSUMER (of type st or stf). */
6857 ia64_st_address_bypass_p (rtx producer, rtx consumer)
6861 if (producer == NULL_RTX || consumer == NULL_RTX)
6863 dest = ia64_single_set (producer);
6864 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
6865 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
6867 if (GET_CODE (reg) == SUBREG)
6868 reg = SUBREG_REG (reg);
6869 dest = ia64_single_set (consumer);
6870 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
6871 || GET_CODE (mem) != MEM)
6873 return reg_mentioned_p (reg, mem);
6876 /* The following function returns TRUE if PRODUCER (of type ilog or
6877 ld) produces address for CONSUMER (of type ld or fld). */
6880 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
6882 rtx dest, src, reg, mem;
6884 if (producer == NULL_RTX || consumer == NULL_RTX)
6886 dest = ia64_single_set (producer);
6887 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
6888 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
6890 if (GET_CODE (reg) == SUBREG)
6891 reg = SUBREG_REG (reg);
6892 src = ia64_single_set (consumer);
6893 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
6895 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
6896 mem = XVECEXP (mem, 0, 0);
6897 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
6898 mem = XEXP (mem, 0);
6900 /* Note that LO_SUM is used for GOT loads. */
6901 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
6904 return reg_mentioned_p (reg, mem);
6907 /* The following function returns TRUE if INSN produces address for a
6908 load/store insn. We will place such insns into M slot because it
6909 decreases its latency time. */
6912 ia64_produce_address_p (rtx insn)
6918 /* Emit pseudo-ops for the assembler to describe predicate relations.
6919 At present this assumes that we only consider predicate pairs to
6920 be mutex, and that the assembler can deduce proper values from
6921 straight-line code. */
6924 emit_predicate_relation_info (void)
6928 FOR_EACH_BB_REVERSE (bb)
6931 rtx head = BB_HEAD (bb);
6933 /* We only need such notes at code labels. */
6934 if (GET_CODE (head) != CODE_LABEL)
6936 if (GET_CODE (NEXT_INSN (head)) == NOTE
6937 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6938 head = NEXT_INSN (head);
6940 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6941 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6943 rtx p = gen_rtx_REG (BImode, r);
6944 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6945 if (head == BB_END (bb))
6951 /* Look for conditional calls that do not return, and protect predicate
6952 relations around them. Otherwise the assembler will assume the call
6953 returns, and complain about uses of call-clobbered predicates after
6955 FOR_EACH_BB_REVERSE (bb)
6957 rtx insn = BB_HEAD (bb);
6961 if (GET_CODE (insn) == CALL_INSN
6962 && GET_CODE (PATTERN (insn)) == COND_EXEC
6963 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6965 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6966 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6967 if (BB_HEAD (bb) == insn)
6969 if (BB_END (bb) == insn)
6973 if (insn == BB_END (bb))
6975 insn = NEXT_INSN (insn);
6980 /* Perform machine dependent operations on the rtl chain INSNS. */
6985 /* We are freeing block_for_insn in the toplev to keep compatibility
6986 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6987 compute_bb_for_insn ();
6989 /* If optimizing, we'll have split before scheduling. */
6991 split_all_insns (0);
6993 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6994 non-optimizing bootstrap. */
6995 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
6997 if (ia64_flag_schedule_insns2)
6999 timevar_push (TV_SCHED2);
7000 ia64_final_schedule = 1;
7002 initiate_bundle_states ();
7003 ia64_nop = make_insn_raw (gen_nop ());
7004 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7005 recog_memoized (ia64_nop);
7006 clocks_length = get_max_uid () + 1;
7007 stops_p = xcalloc (1, clocks_length);
7008 if (ia64_tune == PROCESSOR_ITANIUM)
7010 clocks = xcalloc (clocks_length, sizeof (int));
7011 add_cycles = xcalloc (clocks_length, sizeof (int));
7013 if (ia64_tune == PROCESSOR_ITANIUM2)
7015 pos_1 = get_cpu_unit_code ("2_1");
7016 pos_2 = get_cpu_unit_code ("2_2");
7017 pos_3 = get_cpu_unit_code ("2_3");
7018 pos_4 = get_cpu_unit_code ("2_4");
7019 pos_5 = get_cpu_unit_code ("2_5");
7020 pos_6 = get_cpu_unit_code ("2_6");
7021 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7022 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7023 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7024 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7025 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7026 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7027 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7028 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7029 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7030 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7031 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7032 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7033 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7034 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7035 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7036 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7037 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7038 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7039 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7040 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7044 pos_1 = get_cpu_unit_code ("1_1");
7045 pos_2 = get_cpu_unit_code ("1_2");
7046 pos_3 = get_cpu_unit_code ("1_3");
7047 pos_4 = get_cpu_unit_code ("1_4");
7048 pos_5 = get_cpu_unit_code ("1_5");
7049 pos_6 = get_cpu_unit_code ("1_6");
7050 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7051 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7052 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7053 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7054 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7055 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7056 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7057 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7058 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7059 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7060 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7061 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7062 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7063 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7064 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7065 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7066 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7067 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7068 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7069 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7071 schedule_ebbs (dump_file);
7072 finish_bundle_states ();
7073 if (ia64_tune == PROCESSOR_ITANIUM)
7079 emit_insn_group_barriers (dump_file);
7081 ia64_final_schedule = 0;
7082 timevar_pop (TV_SCHED2);
7085 emit_all_insn_group_barriers (dump_file);
7087 /* A call must not be the last instruction in a function, so that the
7088 return address is still within the function, so that unwinding works
7089 properly. Note that IA-64 differs from dwarf2 on this point. */
7090 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7095 insn = get_last_insn ();
7096 if (! INSN_P (insn))
7097 insn = prev_active_insn (insn);
7098 /* Skip over insns that expand to nothing. */
7099 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7101 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7102 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7104 insn = prev_active_insn (insn);
7106 if (GET_CODE (insn) == CALL_INSN)
7109 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7110 emit_insn (gen_break_f ());
7111 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7116 emit_predicate_relation_info ();
7118 if (ia64_flag_var_tracking)
7120 timevar_push (TV_VAR_TRACKING);
7121 variable_tracking_main ();
7122 timevar_pop (TV_VAR_TRACKING);
7126 /* Return true if REGNO is used by the epilogue. */
7129 ia64_epilogue_uses (int regno)
7134 /* With a call to a function in another module, we will write a new
7135 value to "gp". After returning from such a call, we need to make
7136 sure the function restores the original gp-value, even if the
7137 function itself does not use the gp anymore. */
7138 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7140 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7141 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7142 /* For functions defined with the syscall_linkage attribute, all
7143 input registers are marked as live at all function exits. This
7144 prevents the register allocator from using the input registers,
7145 which in turn makes it possible to restart a system call after
7146 an interrupt without having to save/restore the input registers.
7147 This also prevents kernel data from leaking to application code. */
7148 return lookup_attribute ("syscall_linkage",
7149 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7152 /* Conditional return patterns can't represent the use of `b0' as
7153 the return address, so we force the value live this way. */
7157 /* Likewise for ar.pfs, which is used by br.ret. */
7165 /* Return true if REGNO is used by the frame unwinder. */
7168 ia64_eh_uses (int regno)
7170 if (! reload_completed)
7173 if (current_frame_info.reg_save_b0
7174 && regno == current_frame_info.reg_save_b0)
7176 if (current_frame_info.reg_save_pr
7177 && regno == current_frame_info.reg_save_pr)
7179 if (current_frame_info.reg_save_ar_pfs
7180 && regno == current_frame_info.reg_save_ar_pfs)
7182 if (current_frame_info.reg_save_ar_unat
7183 && regno == current_frame_info.reg_save_ar_unat)
7185 if (current_frame_info.reg_save_ar_lc
7186 && regno == current_frame_info.reg_save_ar_lc)
7192 /* Return true if this goes in small data/bss. */
7194 /* ??? We could also support own long data here. Generating movl/add/ld8
7195 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7196 code faster because there is one less load. This also includes incomplete
7197 types which can't go in sdata/sbss. */
7200 ia64_in_small_data_p (tree exp)
7202 if (TARGET_NO_SDATA)
7205 /* We want to merge strings, so we never consider them small data. */
7206 if (TREE_CODE (exp) == STRING_CST)
7209 /* Functions are never small data. */
7210 if (TREE_CODE (exp) == FUNCTION_DECL)
7213 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7215 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7216 if (strcmp (section, ".sdata") == 0
7217 || strcmp (section, ".sbss") == 0)
7222 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7224 /* If this is an incomplete type with size 0, then we can't put it
7225 in sdata because it might be too big when completed. */
7226 if (size > 0 && size <= ia64_section_threshold)
7233 /* Output assembly directives for prologue regions. */
7235 /* The current basic block number. */
7237 static bool last_block;
7239 /* True if we need a copy_state command at the start of the next block. */
7241 static bool need_copy_state;
7243 /* The function emits unwind directives for the start of an epilogue. */
7246 process_epilogue (void)
7248 /* If this isn't the last block of the function, then we need to label the
7249 current state, and copy it back in at the start of the next block. */
7253 fprintf (asm_out_file, "\t.label_state 1\n");
7254 need_copy_state = true;
7257 fprintf (asm_out_file, "\t.restore sp\n");
7260 /* This function processes a SET pattern looking for specific patterns
7261 which result in emitting an assembly directive required for unwinding. */
7264 process_set (FILE *asm_out_file, rtx pat)
7266 rtx src = SET_SRC (pat);
7267 rtx dest = SET_DEST (pat);
7268 int src_regno, dest_regno;
7270 /* Look for the ALLOC insn. */
7271 if (GET_CODE (src) == UNSPEC_VOLATILE
7272 && XINT (src, 1) == UNSPECV_ALLOC
7273 && GET_CODE (dest) == REG)
7275 dest_regno = REGNO (dest);
7277 /* If this isn't the final destination for ar.pfs, the alloc
7278 shouldn't have been marked frame related. */
7279 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7282 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7283 ia64_dbx_register_number (dest_regno));
7287 /* Look for SP = .... */
7288 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7290 if (GET_CODE (src) == PLUS)
7292 rtx op0 = XEXP (src, 0);
7293 rtx op1 = XEXP (src, 1);
7294 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7296 if (INTVAL (op1) < 0)
7297 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7300 process_epilogue ();
7305 else if (GET_CODE (src) == REG
7306 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7307 process_epilogue ();
7314 /* Register move we need to look at. */
7315 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7317 src_regno = REGNO (src);
7318 dest_regno = REGNO (dest);
7323 /* Saving return address pointer. */
7324 if (dest_regno != current_frame_info.reg_save_b0)
7326 fprintf (asm_out_file, "\t.save rp, r%d\n",
7327 ia64_dbx_register_number (dest_regno));
7331 if (dest_regno != current_frame_info.reg_save_pr)
7333 fprintf (asm_out_file, "\t.save pr, r%d\n",
7334 ia64_dbx_register_number (dest_regno));
7337 case AR_UNAT_REGNUM:
7338 if (dest_regno != current_frame_info.reg_save_ar_unat)
7340 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7341 ia64_dbx_register_number (dest_regno));
7345 if (dest_regno != current_frame_info.reg_save_ar_lc)
7347 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7348 ia64_dbx_register_number (dest_regno));
7351 case STACK_POINTER_REGNUM:
7352 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7353 || ! frame_pointer_needed)
7355 fprintf (asm_out_file, "\t.vframe r%d\n",
7356 ia64_dbx_register_number (dest_regno));
7360 /* Everything else should indicate being stored to memory. */
7365 /* Memory store we need to look at. */
7366 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7372 if (GET_CODE (XEXP (dest, 0)) == REG)
7374 base = XEXP (dest, 0);
7377 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7378 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7380 base = XEXP (XEXP (dest, 0), 0);
7381 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7386 if (base == hard_frame_pointer_rtx)
7388 saveop = ".savepsp";
7391 else if (base == stack_pointer_rtx)
7396 src_regno = REGNO (src);
7400 if (current_frame_info.reg_save_b0 != 0)
7402 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7406 if (current_frame_info.reg_save_pr != 0)
7408 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7412 if (current_frame_info.reg_save_ar_lc != 0)
7414 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7418 if (current_frame_info.reg_save_ar_pfs != 0)
7420 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7423 case AR_UNAT_REGNUM:
7424 if (current_frame_info.reg_save_ar_unat != 0)
7426 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7433 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7434 1 << (src_regno - GR_REG (4)));
7442 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7443 1 << (src_regno - BR_REG (1)));
7450 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7451 1 << (src_regno - FR_REG (2)));
7454 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7455 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7456 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7457 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7458 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7459 1 << (src_regno - FR_REG (12)));
7471 /* This function looks at a single insn and emits any directives
7472 required to unwind this insn. */
7474 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7476 if (flag_unwind_tables
7477 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7481 if (GET_CODE (insn) == NOTE
7482 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7484 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7486 /* Restore unwind state from immediately before the epilogue. */
7487 if (need_copy_state)
7489 fprintf (asm_out_file, "\t.body\n");
7490 fprintf (asm_out_file, "\t.copy_state 1\n");
7491 need_copy_state = false;
7495 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7498 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7500 pat = XEXP (pat, 0);
7502 pat = PATTERN (insn);
7504 switch (GET_CODE (pat))
7507 process_set (asm_out_file, pat);
7513 int limit = XVECLEN (pat, 0);
7514 for (par_index = 0; par_index < limit; par_index++)
7516 rtx x = XVECEXP (pat, 0, par_index);
7517 if (GET_CODE (x) == SET)
7518 process_set (asm_out_file, x);
7531 ia64_init_builtins (void)
7533 tree psi_type_node = build_pointer_type (integer_type_node);
7534 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7536 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7537 tree si_ftype_psi_si_si
7538 = build_function_type_list (integer_type_node,
7539 psi_type_node, integer_type_node,
7540 integer_type_node, NULL_TREE);
7542 /* __sync_val_compare_and_swap_di */
7543 tree di_ftype_pdi_di_di
7544 = build_function_type_list (long_integer_type_node,
7545 pdi_type_node, long_integer_type_node,
7546 long_integer_type_node, NULL_TREE);
7547 /* __sync_bool_compare_and_swap_di */
7548 tree si_ftype_pdi_di_di
7549 = build_function_type_list (integer_type_node,
7550 pdi_type_node, long_integer_type_node,
7551 long_integer_type_node, NULL_TREE);
7552 /* __sync_synchronize */
7553 tree void_ftype_void
7554 = build_function_type (void_type_node, void_list_node);
7556 /* __sync_lock_test_and_set_si */
7557 tree si_ftype_psi_si
7558 = build_function_type_list (integer_type_node,
7559 psi_type_node, integer_type_node, NULL_TREE);
7561 /* __sync_lock_test_and_set_di */
7562 tree di_ftype_pdi_di
7563 = build_function_type_list (long_integer_type_node,
7564 pdi_type_node, long_integer_type_node,
7567 /* __sync_lock_release_si */
7569 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7571 /* __sync_lock_release_di */
7573 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7578 /* The __fpreg type. */
7579 fpreg_type = make_node (REAL_TYPE);
7580 /* ??? The back end should know to load/save __fpreg variables using
7581 the ldf.fill and stf.spill instructions. */
7582 TYPE_PRECISION (fpreg_type) = 80;
7583 layout_type (fpreg_type);
7584 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
7586 /* The __float80 type. */
7587 float80_type = make_node (REAL_TYPE);
7588 TYPE_PRECISION (float80_type) = 80;
7589 layout_type (float80_type);
7590 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
7592 /* The __float128 type. */
7595 tree float128_type = make_node (REAL_TYPE);
7596 TYPE_PRECISION (float128_type) = 128;
7597 layout_type (float128_type);
7598 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
7601 /* Under HPUX, this is a synonym for "long double". */
7602 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
7605 #define def_builtin(name, type, code) \
7606 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
7609 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7610 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7611 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7612 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7613 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7614 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7615 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7616 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7618 def_builtin ("__sync_synchronize", void_ftype_void,
7619 IA64_BUILTIN_SYNCHRONIZE);
7621 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7622 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7623 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7624 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7625 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7626 IA64_BUILTIN_LOCK_RELEASE_SI);
7627 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7628 IA64_BUILTIN_LOCK_RELEASE_DI);
7630 def_builtin ("__builtin_ia64_bsp",
7631 build_function_type (ptr_type_node, void_list_node),
7634 def_builtin ("__builtin_ia64_flushrs",
7635 build_function_type (void_type_node, void_list_node),
7636 IA64_BUILTIN_FLUSHRS);
7638 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7639 IA64_BUILTIN_FETCH_AND_ADD_SI);
7640 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7641 IA64_BUILTIN_FETCH_AND_SUB_SI);
7642 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7643 IA64_BUILTIN_FETCH_AND_OR_SI);
7644 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7645 IA64_BUILTIN_FETCH_AND_AND_SI);
7646 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7647 IA64_BUILTIN_FETCH_AND_XOR_SI);
7648 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7649 IA64_BUILTIN_FETCH_AND_NAND_SI);
7651 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7652 IA64_BUILTIN_ADD_AND_FETCH_SI);
7653 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7654 IA64_BUILTIN_SUB_AND_FETCH_SI);
7655 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7656 IA64_BUILTIN_OR_AND_FETCH_SI);
7657 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7658 IA64_BUILTIN_AND_AND_FETCH_SI);
7659 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7660 IA64_BUILTIN_XOR_AND_FETCH_SI);
7661 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7662 IA64_BUILTIN_NAND_AND_FETCH_SI);
7664 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7665 IA64_BUILTIN_FETCH_AND_ADD_DI);
7666 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7667 IA64_BUILTIN_FETCH_AND_SUB_DI);
7668 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7669 IA64_BUILTIN_FETCH_AND_OR_DI);
7670 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7671 IA64_BUILTIN_FETCH_AND_AND_DI);
7672 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7673 IA64_BUILTIN_FETCH_AND_XOR_DI);
7674 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7675 IA64_BUILTIN_FETCH_AND_NAND_DI);
7677 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7678 IA64_BUILTIN_ADD_AND_FETCH_DI);
7679 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7680 IA64_BUILTIN_SUB_AND_FETCH_DI);
7681 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7682 IA64_BUILTIN_OR_AND_FETCH_DI);
7683 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7684 IA64_BUILTIN_AND_AND_FETCH_DI);
7685 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7686 IA64_BUILTIN_XOR_AND_FETCH_DI);
7687 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7688 IA64_BUILTIN_NAND_AND_FETCH_DI);
7693 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7701 cmpxchgsz.acq tmp = [ptr], tmp
7702 } while (tmp != ret)
7706 ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
7707 tree arglist, rtx target)
7709 rtx ret, label, tmp, ccv, insn, mem, value;
7712 arg0 = TREE_VALUE (arglist);
7713 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7714 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7715 #ifdef POINTERS_EXTEND_UNSIGNED
7716 if (GET_MODE(mem) != Pmode)
7717 mem = convert_memory_address (Pmode, mem);
7719 value = expand_expr (arg1, NULL_RTX, mode, 0);
7721 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7722 MEM_VOLATILE_P (mem) = 1;
7724 if (target && register_operand (target, mode))
7727 ret = gen_reg_rtx (mode);
7729 emit_insn (gen_mf ());
7731 /* Special case for fetchadd instructions. */
7732 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7735 insn = gen_fetchadd_acq_si (ret, mem, value);
7737 insn = gen_fetchadd_acq_di (ret, mem, value);
7742 tmp = gen_reg_rtx (mode);
7743 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7744 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7745 emit_move_insn (tmp, mem);
7747 label = gen_label_rtx ();
7749 emit_move_insn (ret, tmp);
7750 convert_move (ccv, tmp, /*unsignedp=*/1);
7752 /* Perform the specific operation. Special case NAND by noticing
7753 one_cmpl_optab instead. */
7754 if (binoptab == one_cmpl_optab)
7756 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7757 binoptab = and_optab;
7759 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7762 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7764 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7767 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7772 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7779 ret = tmp <op> value;
7780 cmpxchgsz.acq tmp = [ptr], ret
7781 } while (tmp != old)
7785 ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
7786 tree arglist, rtx target)
7788 rtx old, label, tmp, ret, ccv, insn, mem, value;
7791 arg0 = TREE_VALUE (arglist);
7792 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7793 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7794 #ifdef POINTERS_EXTEND_UNSIGNED
7795 if (GET_MODE(mem) != Pmode)
7796 mem = convert_memory_address (Pmode, mem);
7799 value = expand_expr (arg1, NULL_RTX, mode, 0);
7801 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7802 MEM_VOLATILE_P (mem) = 1;
7804 if (target && ! register_operand (target, mode))
7807 emit_insn (gen_mf ());
7808 tmp = gen_reg_rtx (mode);
7809 old = gen_reg_rtx (mode);
7810 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7811 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7813 emit_move_insn (tmp, mem);
7815 label = gen_label_rtx ();
7817 emit_move_insn (old, tmp);
7818 convert_move (ccv, tmp, /*unsignedp=*/1);
7820 /* Perform the specific operation. Special case NAND by noticing
7821 one_cmpl_optab instead. */
7822 if (binoptab == one_cmpl_optab)
7824 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7825 binoptab = and_optab;
7827 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7830 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7832 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7835 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7840 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7844 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7847 For bool_ it's the same except return ret == oldval.
7851 ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
7852 int boolp, tree arglist, rtx target)
7854 tree arg0, arg1, arg2;
7855 rtx mem, old, new, ccv, tmp, insn;
7857 arg0 = TREE_VALUE (arglist);
7858 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7859 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7860 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7861 old = expand_expr (arg1, NULL_RTX, mode, 0);
7862 new = expand_expr (arg2, NULL_RTX, mode, 0);
7864 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7865 MEM_VOLATILE_P (mem) = 1;
7867 if (GET_MODE (old) != mode)
7868 old = convert_to_mode (mode, old, /*unsignedp=*/1);
7869 if (GET_MODE (new) != mode)
7870 new = convert_to_mode (mode, new, /*unsignedp=*/1);
7872 if (! register_operand (old, mode))
7873 old = copy_to_mode_reg (mode, old);
7874 if (! register_operand (new, mode))
7875 new = copy_to_mode_reg (mode, new);
7877 if (! boolp && target && register_operand (target, mode))
7880 tmp = gen_reg_rtx (mode);
7882 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7883 convert_move (ccv, old, /*unsignedp=*/1);
7884 emit_insn (gen_mf ());
7886 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7888 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7894 target = gen_reg_rtx (rmode);
7895 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7901 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7904 ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
7908 rtx mem, new, ret, insn;
7910 arg0 = TREE_VALUE (arglist);
7911 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7912 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7913 new = expand_expr (arg1, NULL_RTX, mode, 0);
7915 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7916 MEM_VOLATILE_P (mem) = 1;
7917 if (! register_operand (new, mode))
7918 new = copy_to_mode_reg (mode, new);
7920 if (target && register_operand (target, mode))
7923 ret = gen_reg_rtx (mode);
7926 insn = gen_xchgsi (ret, mem, new);
7928 insn = gen_xchgdi (ret, mem, new);
7934 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7937 ia64_expand_lock_release (enum machine_mode mode, tree arglist,
7938 rtx target ATTRIBUTE_UNUSED)
7943 arg0 = TREE_VALUE (arglist);
7944 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7946 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7947 MEM_VOLATILE_P (mem) = 1;
7949 emit_move_insn (mem, const0_rtx);
7955 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
7956 enum machine_mode mode ATTRIBUTE_UNUSED,
7957 int ignore ATTRIBUTE_UNUSED)
7959 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7960 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7961 tree arglist = TREE_OPERAND (exp, 1);
7962 enum machine_mode rmode = VOIDmode;
7966 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7967 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7972 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7973 case IA64_BUILTIN_LOCK_RELEASE_SI:
7974 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7975 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7976 case IA64_BUILTIN_FETCH_AND_OR_SI:
7977 case IA64_BUILTIN_FETCH_AND_AND_SI:
7978 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7979 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7980 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7981 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7982 case IA64_BUILTIN_OR_AND_FETCH_SI:
7983 case IA64_BUILTIN_AND_AND_FETCH_SI:
7984 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7985 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7989 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7994 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7999 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8000 case IA64_BUILTIN_LOCK_RELEASE_DI:
8001 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8002 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8003 case IA64_BUILTIN_FETCH_AND_OR_DI:
8004 case IA64_BUILTIN_FETCH_AND_AND_DI:
8005 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8006 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8007 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8008 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8009 case IA64_BUILTIN_OR_AND_FETCH_DI:
8010 case IA64_BUILTIN_AND_AND_FETCH_DI:
8011 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8012 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8022 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8023 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8024 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8027 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8028 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8029 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8032 case IA64_BUILTIN_SYNCHRONIZE:
8033 emit_insn (gen_mf ());
8036 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8037 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8038 return ia64_expand_lock_test_and_set (mode, arglist, target);
8040 case IA64_BUILTIN_LOCK_RELEASE_SI:
8041 case IA64_BUILTIN_LOCK_RELEASE_DI:
8042 return ia64_expand_lock_release (mode, arglist, target);
8044 case IA64_BUILTIN_BSP:
8045 if (! target || ! register_operand (target, DImode))
8046 target = gen_reg_rtx (DImode);
8047 emit_insn (gen_bsp_value (target));
8048 #ifdef POINTERS_EXTEND_UNSIGNED
8049 target = convert_memory_address (ptr_mode, target);
8053 case IA64_BUILTIN_FLUSHRS:
8054 emit_insn (gen_flushrs ());
8057 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8058 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8059 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8061 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8062 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8063 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8065 case IA64_BUILTIN_FETCH_AND_OR_SI:
8066 case IA64_BUILTIN_FETCH_AND_OR_DI:
8067 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8069 case IA64_BUILTIN_FETCH_AND_AND_SI:
8070 case IA64_BUILTIN_FETCH_AND_AND_DI:
8071 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8073 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8074 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8075 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8077 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8078 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8079 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8081 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8082 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8083 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8085 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8086 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8087 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8089 case IA64_BUILTIN_OR_AND_FETCH_SI:
8090 case IA64_BUILTIN_OR_AND_FETCH_DI:
8091 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8093 case IA64_BUILTIN_AND_AND_FETCH_SI:
8094 case IA64_BUILTIN_AND_AND_FETCH_DI:
8095 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8097 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8098 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8099 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8101 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8102 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8103 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8112 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8113 most significant bits of the stack slot. */
8116 ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8118 /* Exception to normal case for structures/unions/etc. */
8120 if (type && AGGREGATE_TYPE_P (type)
8121 && int_size_in_bytes (type) < UNITS_PER_WORD)
8124 /* Fall back to the default. */
8125 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8128 /* Linked list of all external functions that are to be emitted by GCC.
8129 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8130 order to avoid putting out names that are never really used. */
8132 struct extern_func_list GTY(())
8134 struct extern_func_list *next;
8138 static GTY(()) struct extern_func_list *extern_func_head;
8141 ia64_hpux_add_extern_decl (tree decl)
8143 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8146 p->next = extern_func_head;
8147 extern_func_head = p;
8150 /* Print out the list of used global functions. */
8153 ia64_hpux_file_end (void)
8155 struct extern_func_list *p;
8157 for (p = extern_func_head; p; p = p->next)
8159 tree decl = p->decl;
8160 tree id = DECL_ASSEMBLER_NAME (decl);
8165 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8167 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8169 TREE_ASM_WRITTEN (decl) = 1;
8170 (*targetm.asm_out.globalize_label) (asm_out_file, name);
8171 fputs (TYPE_ASM_OP, asm_out_file);
8172 assemble_name (asm_out_file, name);
8173 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8177 extern_func_head = 0;
8180 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8181 modes of word_mode and larger. Rename the TFmode libfuncs using the
8182 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8183 backward compatibility. */
8186 ia64_init_libfuncs (void)
8188 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8189 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8190 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8191 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8193 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8194 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8195 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8196 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8197 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8199 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8200 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8201 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8202 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8203 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8204 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8206 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8207 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8208 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8209 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8211 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8212 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8215 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8218 ia64_hpux_init_libfuncs (void)
8220 ia64_init_libfuncs ();
8222 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8223 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8224 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8226 /* ia64_expand_compare uses this. */
8227 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8229 /* These should never be used. */
8230 set_optab_libfunc (eq_optab, TFmode, 0);
8231 set_optab_libfunc (ne_optab, TFmode, 0);
8232 set_optab_libfunc (gt_optab, TFmode, 0);
8233 set_optab_libfunc (ge_optab, TFmode, 0);
8234 set_optab_libfunc (lt_optab, TFmode, 0);
8235 set_optab_libfunc (le_optab, TFmode, 0);
8238 /* Rename the division and modulus functions in VMS. */
8241 ia64_vms_init_libfuncs (void)
8243 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8244 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8245 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8246 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8247 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8248 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8249 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8250 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8253 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8254 the HPUX conventions. */
8257 ia64_sysv4_init_libfuncs (void)
8259 ia64_init_libfuncs ();
8261 /* These functions are not part of the HPUX TFmode interface. We
8262 use them instead of _U_Qfcmp, which doesn't work the way we
8264 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8265 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8266 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8267 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8268 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8269 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8271 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8272 glibc doesn't have them. */
8275 /* Switch to the section to which we should output X. The only thing
8276 special we do here is to honor small data. */
8279 ia64_select_rtx_section (enum machine_mode mode, rtx x,
8280 unsigned HOST_WIDE_INT align)
8282 if (GET_MODE_SIZE (mode) > 0
8283 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8286 default_elf_select_rtx_section (mode, x, align);
8289 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8290 Pretend flag_pic is always set. */
8293 ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8295 default_elf_select_section_1 (exp, reloc, align, true);
8299 ia64_rwreloc_unique_section (tree decl, int reloc)
8301 default_unique_section_1 (decl, reloc, true);
8305 ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8306 unsigned HOST_WIDE_INT align)
8308 int save_pic = flag_pic;
8310 ia64_select_rtx_section (mode, x, align);
8311 flag_pic = save_pic;
8315 ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8317 return default_section_type_flags_1 (decl, name, reloc, true);
8320 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8321 structure type and that the address of that type should be passed
8322 in out0, rather than in r8. */
8325 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8327 tree ret_type = TREE_TYPE (fntype);
8329 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8330 as the structure return address parameter, if the return value
8331 type has a non-trivial copy constructor or destructor. It is not
8332 clear if this same convention should be used for other
8333 programming languages. Until G++ 3.4, we incorrectly used r8 for
8334 these return values. */
8335 return (abi_version_at_least (2)
8337 && TYPE_MODE (ret_type) == BLKmode
8338 && TREE_ADDRESSABLE (ret_type)
8339 && strcmp (lang_hooks.name, "GNU C++") == 0);
8342 /* Output the assembler code for a thunk function. THUNK_DECL is the
8343 declaration for the thunk function itself, FUNCTION is the decl for
8344 the target function. DELTA is an immediate constant offset to be
8345 added to THIS. If VCALL_OFFSET is nonzero, the word at
8346 *(*this + vcall_offset) should be added to THIS. */
8349 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8350 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8353 rtx this, insn, funexp;
8354 unsigned int this_parmno;
8355 unsigned int this_regno;
8357 reload_completed = 1;
8358 epilogue_completed = 1;
8360 reset_block_changes ();
8362 /* Set things up as ia64_expand_prologue might. */
8363 last_scratch_gr_reg = 15;
8365 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8366 current_frame_info.spill_cfa_off = -16;
8367 current_frame_info.n_input_regs = 1;
8368 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8370 /* Mark the end of the (empty) prologue. */
8371 emit_note (NOTE_INSN_PROLOGUE_END);
8373 /* Figure out whether "this" will be the first parameter (the
8374 typical case) or the second parameter (as happens when the
8375 virtual function returns certain class objects). */
8377 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8379 this_regno = IN_REG (this_parmno);
8380 if (!TARGET_REG_NAMES)
8381 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8383 this = gen_rtx_REG (Pmode, this_regno);
8386 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8387 REG_POINTER (tmp) = 1;
8388 if (delta && CONST_OK_FOR_I (delta))
8390 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8394 emit_insn (gen_ptr_extend (this, tmp));
8397 /* Apply the constant offset, if required. */
8400 rtx delta_rtx = GEN_INT (delta);
8402 if (!CONST_OK_FOR_I (delta))
8404 rtx tmp = gen_rtx_REG (Pmode, 2);
8405 emit_move_insn (tmp, delta_rtx);
8408 emit_insn (gen_adddi3 (this, this, delta_rtx));
8411 /* Apply the offset from the vtable, if required. */
8414 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8415 rtx tmp = gen_rtx_REG (Pmode, 2);
8419 rtx t = gen_rtx_REG (ptr_mode, 2);
8420 REG_POINTER (t) = 1;
8421 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8422 if (CONST_OK_FOR_I (vcall_offset))
8424 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8429 emit_insn (gen_ptr_extend (tmp, t));
8432 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8436 if (!CONST_OK_FOR_J (vcall_offset))
8438 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8439 emit_move_insn (tmp2, vcall_offset_rtx);
8440 vcall_offset_rtx = tmp2;
8442 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8446 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8447 gen_rtx_MEM (ptr_mode, tmp));
8449 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8451 emit_insn (gen_adddi3 (this, this, tmp));
8454 /* Generate a tail call to the target function. */
8455 if (! TREE_USED (function))
8457 assemble_external (function);
8458 TREE_USED (function) = 1;
8460 funexp = XEXP (DECL_RTL (function), 0);
8461 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8462 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8463 insn = get_last_insn ();
8464 SIBLING_CALL_P (insn) = 1;
8466 /* Code generation for calls relies on splitting. */
8467 reload_completed = 1;
8468 epilogue_completed = 1;
8469 try_split (PATTERN (insn), insn, 0);
8473 /* Run just enough of rest_of_compilation to get the insns emitted.
8474 There's not really enough bulk here to make other passes such as
8475 instruction scheduling worth while. Note that use_thunk calls
8476 assemble_start_function and assemble_end_function. */
8478 insn_locators_initialize ();
8479 emit_all_insn_group_barriers (NULL);
8480 insn = get_insns ();
8481 shorten_branches (insn);
8482 final_start_function (insn, file, 1);
8483 final (insn, file, 1, 0);
8484 final_end_function ();
8486 reload_completed = 0;
8487 epilogue_completed = 0;
8491 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8494 ia64_struct_value_rtx (tree fntype,
8495 int incoming ATTRIBUTE_UNUSED)
8497 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8499 return gen_rtx_REG (Pmode, GR_REG (8));
8502 #include "gt-ia64.h"