1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "sched-int.h"
50 #include "target-def.h"
53 #include "langhooks.h"
54 #include "cfglayout.h"
61 #include "tm-constrs.h"
62 #include "sel-sched.h"
64 /* This is used for communication between ASM_OUTPUT_LABEL and
65 ASM_OUTPUT_LABELREF. */
66 int ia64_asm_output_label = 0;
68 /* Register names for ia64_expand_prologue. */
69 static const char * const ia64_reg_numbers[96] =
70 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79 "r104","r105","r106","r107","r108","r109","r110","r111",
80 "r112","r113","r114","r115","r116","r117","r118","r119",
81 "r120","r121","r122","r123","r124","r125","r126","r127"};
83 /* ??? These strings could be shared with REGISTER_NAMES. */
84 static const char * const ia64_input_reg_names[8] =
85 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
87 /* ??? These strings could be shared with REGISTER_NAMES. */
88 static const char * const ia64_local_reg_names[80] =
89 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
100 /* ??? These strings could be shared with REGISTER_NAMES. */
101 static const char * const ia64_output_reg_names[8] =
102 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
104 /* Which cpu are we scheduling for. */
105 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
107 /* Determines whether we run our final scheduling pass or not. We always
108 avoid the normal second scheduling pass. */
109 static int ia64_flag_schedule_insns2;
111 /* Determines whether we run variable tracking in machine dependent
113 static int ia64_flag_var_tracking;
115 /* Variables which are this size or smaller are put in the sdata/sbss
118 unsigned int ia64_section_threshold;
120 /* The following variable is used by the DFA insn scheduler. The value is
121 TRUE if we do insn bundling instead of insn scheduling. */
133 number_of_ia64_frame_regs
136 /* Structure to be filled in by ia64_compute_frame_size with register
137 save masks and offsets for the current function. */
139 struct ia64_frame_info
141 HOST_WIDE_INT total_size; /* size of the stack frame, not including
142 the caller's scratch area. */
143 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
144 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
145 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
146 HARD_REG_SET mask; /* mask of saved registers. */
147 unsigned int gr_used_mask; /* mask of registers in use as gr spill
148 registers or long-term scratches. */
149 int n_spilled; /* number of spilled registers. */
150 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
151 int n_input_regs; /* number of input registers used. */
152 int n_local_regs; /* number of local registers used. */
153 int n_output_regs; /* number of output registers used. */
154 int n_rotate_regs; /* number of rotating registers used. */
156 char need_regstk; /* true if a .regstk directive needed. */
157 char initialized; /* true if the data is finalized. */
160 /* Current frame information calculated by ia64_compute_frame_size. */
161 static struct ia64_frame_info current_frame_info;
162 /* The actual registers that are emitted. */
163 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
165 static int ia64_first_cycle_multipass_dfa_lookahead (void);
166 static void ia64_dependencies_evaluation_hook (rtx, rtx);
167 static void ia64_init_dfa_pre_cycle_insn (void);
168 static rtx ia64_dfa_pre_cycle_insn (void);
169 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
170 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
171 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
172 static void ia64_h_i_d_extended (void);
173 static void * ia64_alloc_sched_context (void);
174 static void ia64_init_sched_context (void *, bool);
175 static void ia64_set_sched_context (void *);
176 static void ia64_clear_sched_context (void *);
177 static void ia64_free_sched_context (void *);
178 static int ia64_mode_to_int (enum machine_mode);
179 static void ia64_set_sched_flags (spec_info_t);
180 static ds_t ia64_get_insn_spec_ds (rtx);
181 static ds_t ia64_get_insn_checked_ds (rtx);
182 static bool ia64_skip_rtx_p (const_rtx);
183 static int ia64_speculate_insn (rtx, ds_t, rtx *);
184 static bool ia64_needs_block_p (int);
185 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
186 static int ia64_spec_check_p (rtx);
187 static int ia64_spec_check_src_p (rtx);
188 static rtx gen_tls_get_addr (void);
189 static rtx gen_thread_pointer (void);
190 static int find_gr_spill (enum ia64_frame_regs, int);
191 static int next_scratch_gr_reg (void);
192 static void mark_reg_gr_used_mask (rtx, void *);
193 static void ia64_compute_frame_size (HOST_WIDE_INT);
194 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
195 static void finish_spill_pointers (void);
196 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
197 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
198 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
199 static rtx gen_movdi_x (rtx, rtx, rtx);
200 static rtx gen_fr_spill_x (rtx, rtx, rtx);
201 static rtx gen_fr_restore_x (rtx, rtx, rtx);
203 static bool ia64_can_eliminate (const int, const int);
204 static enum machine_mode hfa_element_mode (const_tree, bool);
205 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
207 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
209 static bool ia64_function_ok_for_sibcall (tree, tree);
210 static bool ia64_return_in_memory (const_tree, const_tree);
211 static bool ia64_rtx_costs (rtx, int, int, int *, bool);
212 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
213 static void fix_range (const char *);
214 static bool ia64_handle_option (size_t, const char *, int);
215 static struct machine_function * ia64_init_machine_status (void);
216 static void emit_insn_group_barriers (FILE *);
217 static void emit_all_insn_group_barriers (FILE *);
218 static void final_emit_insn_group_barriers (FILE *);
219 static void emit_predicate_relation_info (void);
220 static void ia64_reorg (void);
221 static bool ia64_in_small_data_p (const_tree);
222 static void process_epilogue (FILE *, rtx, bool, bool);
223 static int process_set (FILE *, rtx, rtx, bool, bool);
225 static bool ia64_assemble_integer (rtx, unsigned int, int);
226 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
227 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void ia64_output_function_end_prologue (FILE *);
230 static int ia64_issue_rate (void);
231 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
232 static void ia64_sched_init (FILE *, int, int);
233 static void ia64_sched_init_global (FILE *, int, int);
234 static void ia64_sched_finish_global (FILE *, int);
235 static void ia64_sched_finish (FILE *, int);
236 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
237 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
238 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
239 static int ia64_variable_issue (FILE *, int, rtx, int);
241 static struct bundle_state *get_free_bundle_state (void);
242 static void free_bundle_state (struct bundle_state *);
243 static void initiate_bundle_states (void);
244 static void finish_bundle_states (void);
245 static unsigned bundle_state_hash (const void *);
246 static int bundle_state_eq_p (const void *, const void *);
247 static int insert_bundle_state (struct bundle_state *);
248 static void initiate_bundle_state_table (void);
249 static void finish_bundle_state_table (void);
250 static int try_issue_nops (struct bundle_state *, int);
251 static int try_issue_insn (struct bundle_state *, rtx);
252 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
253 static int get_max_pos (state_t);
254 static int get_template (state_t, int);
256 static rtx get_next_important_insn (rtx, rtx);
257 static bool important_for_bundling_p (rtx);
258 static void bundling (FILE *, int, rtx, rtx);
260 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
261 HOST_WIDE_INT, tree);
262 static void ia64_file_start (void);
263 static void ia64_globalize_decl_name (FILE *, tree);
265 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
266 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
267 static section *ia64_select_rtx_section (enum machine_mode, rtx,
268 unsigned HOST_WIDE_INT);
269 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
271 static unsigned int ia64_section_type_flags (tree, const char *, int);
272 static void ia64_init_libfuncs (void)
274 static void ia64_hpux_init_libfuncs (void)
276 static void ia64_sysv4_init_libfuncs (void)
278 static void ia64_vms_init_libfuncs (void)
280 static void ia64_soft_fp_init_libfuncs (void)
282 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
284 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
287 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
288 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
289 static void ia64_encode_section_info (tree, rtx, int);
290 static rtx ia64_struct_value_rtx (tree, int);
291 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
292 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
293 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
294 static bool ia64_cannot_force_const_mem (rtx);
295 static const char *ia64_mangle_type (const_tree);
296 static const char *ia64_invalid_conversion (const_tree, const_tree);
297 static const char *ia64_invalid_unary_op (int, const_tree);
298 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
299 static enum machine_mode ia64_c_mode_for_suffix (char);
300 static enum machine_mode ia64_promote_function_mode (const_tree,
305 static void ia64_trampoline_init (rtx, tree, rtx);
306 static void ia64_override_options_after_change (void);
308 /* Table of valid machine attributes. */
309 static const struct attribute_spec ia64_attribute_table[] =
311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
312 { "syscall_linkage", 0, 0, false, true, true, NULL },
313 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
314 #if TARGET_ABI_OPEN_VMS
315 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute},
317 { "version_id", 1, 1, true, false, false,
318 ia64_handle_version_id_attribute },
319 { NULL, 0, 0, false, false, false, NULL }
322 /* Initialize the GCC target structure. */
323 #undef TARGET_ATTRIBUTE_TABLE
324 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
326 #undef TARGET_INIT_BUILTINS
327 #define TARGET_INIT_BUILTINS ia64_init_builtins
329 #undef TARGET_EXPAND_BUILTIN
330 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
332 #undef TARGET_ASM_BYTE_OP
333 #define TARGET_ASM_BYTE_OP "\tdata1\t"
334 #undef TARGET_ASM_ALIGNED_HI_OP
335 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
338 #undef TARGET_ASM_ALIGNED_DI_OP
339 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
340 #undef TARGET_ASM_UNALIGNED_HI_OP
341 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
342 #undef TARGET_ASM_UNALIGNED_SI_OP
343 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
344 #undef TARGET_ASM_UNALIGNED_DI_OP
345 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
346 #undef TARGET_ASM_INTEGER
347 #define TARGET_ASM_INTEGER ia64_assemble_integer
349 #undef TARGET_ASM_FUNCTION_PROLOGUE
350 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
351 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
352 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
353 #undef TARGET_ASM_FUNCTION_EPILOGUE
354 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
356 #undef TARGET_IN_SMALL_DATA_P
357 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
359 #undef TARGET_SCHED_ADJUST_COST_2
360 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
361 #undef TARGET_SCHED_ISSUE_RATE
362 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
365 #undef TARGET_SCHED_INIT
366 #define TARGET_SCHED_INIT ia64_sched_init
367 #undef TARGET_SCHED_FINISH
368 #define TARGET_SCHED_FINISH ia64_sched_finish
369 #undef TARGET_SCHED_INIT_GLOBAL
370 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
371 #undef TARGET_SCHED_FINISH_GLOBAL
372 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
373 #undef TARGET_SCHED_REORDER
374 #define TARGET_SCHED_REORDER ia64_sched_reorder
375 #undef TARGET_SCHED_REORDER2
376 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
378 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
379 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
381 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
382 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
384 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
385 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
386 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
387 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
389 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
390 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
391 ia64_first_cycle_multipass_dfa_lookahead_guard
393 #undef TARGET_SCHED_DFA_NEW_CYCLE
394 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
396 #undef TARGET_SCHED_H_I_D_EXTENDED
397 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
399 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
400 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
402 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
403 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
405 #undef TARGET_SCHED_SET_SCHED_CONTEXT
406 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
408 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
409 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
411 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
412 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
414 #undef TARGET_SCHED_SET_SCHED_FLAGS
415 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
417 #undef TARGET_SCHED_GET_INSN_SPEC_DS
418 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
420 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
421 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
423 #undef TARGET_SCHED_SPECULATE_INSN
424 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
426 #undef TARGET_SCHED_NEEDS_BLOCK_P
427 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
429 #undef TARGET_SCHED_GEN_SPEC_CHECK
430 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
432 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
433 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
434 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
436 #undef TARGET_SCHED_SKIP_RTX_P
437 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
439 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
440 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
444 #undef TARGET_ASM_OUTPUT_MI_THUNK
445 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
446 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
447 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
449 #undef TARGET_ASM_FILE_START
450 #define TARGET_ASM_FILE_START ia64_file_start
452 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
453 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
455 #undef TARGET_RTX_COSTS
456 #define TARGET_RTX_COSTS ia64_rtx_costs
457 #undef TARGET_ADDRESS_COST
458 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
460 #undef TARGET_UNSPEC_MAY_TRAP_P
461 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
463 #undef TARGET_MACHINE_DEPENDENT_REORG
464 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
469 #undef TARGET_SECTION_TYPE_FLAGS
470 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
473 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
474 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
477 #undef TARGET_PROMOTE_FUNCTION_MODE
478 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
480 /* ??? Investigate. */
482 #undef TARGET_PROMOTE_PROTOTYPES
483 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
486 #undef TARGET_STRUCT_VALUE_RTX
487 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
488 #undef TARGET_RETURN_IN_MEMORY
489 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
490 #undef TARGET_SETUP_INCOMING_VARARGS
491 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
492 #undef TARGET_STRICT_ARGUMENT_NAMING
493 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
494 #undef TARGET_MUST_PASS_IN_STACK
495 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
497 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
498 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
500 #undef TARGET_UNWIND_EMIT
501 #define TARGET_UNWIND_EMIT process_for_unwind_directive
503 #undef TARGET_SCALAR_MODE_SUPPORTED_P
504 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
506 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
508 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
509 in an order different from the specified program order. */
510 #undef TARGET_RELAXED_ORDERING
511 #define TARGET_RELAXED_ORDERING true
513 #undef TARGET_DEFAULT_TARGET_FLAGS
514 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
515 #undef TARGET_HANDLE_OPTION
516 #define TARGET_HANDLE_OPTION ia64_handle_option
518 #undef TARGET_CANNOT_FORCE_CONST_MEM
519 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
521 #undef TARGET_MANGLE_TYPE
522 #define TARGET_MANGLE_TYPE ia64_mangle_type
524 #undef TARGET_INVALID_CONVERSION
525 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
526 #undef TARGET_INVALID_UNARY_OP
527 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
528 #undef TARGET_INVALID_BINARY_OP
529 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
531 #undef TARGET_C_MODE_FOR_SUFFIX
532 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
534 #undef TARGET_CAN_ELIMINATE
535 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
537 #undef TARGET_TRAMPOLINE_INIT
538 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
540 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
541 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
543 struct gcc_target targetm = TARGET_INITIALIZER;
547 ADDR_AREA_NORMAL, /* normal address area */
548 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
552 static GTY(()) tree small_ident1;
553 static GTY(()) tree small_ident2;
558 if (small_ident1 == 0)
560 small_ident1 = get_identifier ("small");
561 small_ident2 = get_identifier ("__small__");
565 /* Retrieve the address area that has been chosen for the given decl. */
567 static ia64_addr_area
568 ia64_get_addr_area (tree decl)
572 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
578 id = TREE_VALUE (TREE_VALUE (model_attr));
579 if (id == small_ident1 || id == small_ident2)
580 return ADDR_AREA_SMALL;
582 return ADDR_AREA_NORMAL;
586 ia64_handle_model_attribute (tree *node, tree name, tree args,
587 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
589 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
591 tree arg, decl = *node;
594 arg = TREE_VALUE (args);
595 if (arg == small_ident1 || arg == small_ident2)
597 addr_area = ADDR_AREA_SMALL;
601 warning (OPT_Wattributes, "invalid argument of %qE attribute",
603 *no_add_attrs = true;
606 switch (TREE_CODE (decl))
609 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
611 && !TREE_STATIC (decl))
613 error_at (DECL_SOURCE_LOCATION (decl),
614 "an address area attribute cannot be specified for "
616 *no_add_attrs = true;
618 area = ia64_get_addr_area (decl);
619 if (area != ADDR_AREA_NORMAL && addr_area != area)
621 error ("address area of %q+D conflicts with previous "
622 "declaration", decl);
623 *no_add_attrs = true;
628 error_at (DECL_SOURCE_LOCATION (decl),
629 "address area attribute cannot be specified for "
631 *no_add_attrs = true;
635 warning (OPT_Wattributes, "%qE attribute ignored",
637 *no_add_attrs = true;
644 /* The section must have global and overlaid attributes. */
645 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
647 /* Part of the low level implementation of DEC Ada pragma Common_Object which
648 enables the shared use of variables stored in overlaid linker areas
649 corresponding to the use of Fortran COMMON. */
652 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
653 int flags ATTRIBUTE_UNUSED,
661 DECL_COMMON (decl) = 1;
662 id = TREE_VALUE (args);
663 if (TREE_CODE (id) == IDENTIFIER_NODE)
664 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
665 else if (TREE_CODE (id) == STRING_CST)
669 warning (OPT_Wattributes,
670 "%qE attribute requires a string constant argument", name);
671 *no_add_attrs = true;
674 DECL_SECTION_NAME (decl) = val;
678 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
681 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
682 unsigned HOST_WIDE_INT size,
685 tree attr = DECL_ATTRIBUTES (decl);
687 /* As common_object attribute set DECL_SECTION_NAME check it before
688 looking up the attribute. */
689 if (DECL_SECTION_NAME (decl) && attr)
690 attr = lookup_attribute ("common_object", attr);
696 /* Code from elfos.h. */
697 fprintf (file, "%s", COMMON_ASM_OP);
698 assemble_name (file, name);
699 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
700 size, align / BITS_PER_UNIT);
704 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
705 ASM_OUTPUT_LABEL (file, name);
706 ASM_OUTPUT_SKIP (file, size ? size : 1);
710 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
713 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
716 if (!(flags & SECTION_VMS_OVERLAY))
718 default_elf_asm_named_section (name, flags, decl);
721 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
724 if (flags & SECTION_DECLARED)
726 fprintf (asm_out_file, "\t.section\t%s\n", name);
730 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
734 ia64_encode_addr_area (tree decl, rtx symbol)
738 flags = SYMBOL_REF_FLAGS (symbol);
739 switch (ia64_get_addr_area (decl))
741 case ADDR_AREA_NORMAL: break;
742 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
743 default: gcc_unreachable ();
745 SYMBOL_REF_FLAGS (symbol) = flags;
749 ia64_encode_section_info (tree decl, rtx rtl, int first)
751 default_encode_section_info (decl, rtl, first);
753 /* Careful not to prod global register variables. */
754 if (TREE_CODE (decl) == VAR_DECL
755 && GET_CODE (DECL_RTL (decl)) == MEM
756 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
757 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
758 ia64_encode_addr_area (decl, XEXP (rtl, 0));
761 /* Return 1 if the operands of a move are ok. */
764 ia64_move_ok (rtx dst, rtx src)
766 /* If we're under init_recog_no_volatile, we'll not be able to use
767 memory_operand. So check the code directly and don't worry about
768 the validity of the underlying address, which should have been
769 checked elsewhere anyway. */
770 if (GET_CODE (dst) != MEM)
772 if (GET_CODE (src) == MEM)
774 if (register_operand (src, VOIDmode))
777 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
778 if (INTEGRAL_MODE_P (GET_MODE (dst)))
779 return src == const0_rtx;
781 return satisfies_constraint_G (src);
784 /* Return 1 if the operands are ok for a floating point load pair. */
787 ia64_load_pair_ok (rtx dst, rtx src)
789 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
791 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
793 switch (GET_CODE (XEXP (src, 0)))
802 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
804 if (GET_CODE (adjust) != CONST_INT
805 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
816 addp4_optimize_ok (rtx op1, rtx op2)
818 return (basereg_operand (op1, GET_MODE(op1)) !=
819 basereg_operand (op2, GET_MODE(op2)));
822 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
823 Return the length of the field, or <= 0 on failure. */
826 ia64_depz_field_mask (rtx rop, rtx rshift)
828 unsigned HOST_WIDE_INT op = INTVAL (rop);
829 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
831 /* Get rid of the zero bits we're shifting in. */
834 /* We must now have a solid block of 1's at bit 0. */
835 return exact_log2 (op + 1);
838 /* Return the TLS model to use for ADDR. */
840 static enum tls_model
841 tls_symbolic_operand_type (rtx addr)
843 enum tls_model tls_kind = TLS_MODEL_NONE;
845 if (GET_CODE (addr) == CONST)
847 if (GET_CODE (XEXP (addr, 0)) == PLUS
848 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
849 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
851 else if (GET_CODE (addr) == SYMBOL_REF)
852 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
857 /* Return true if X is a constant that is valid for some immediate
858 field in an instruction. */
861 ia64_legitimate_constant_p (rtx x)
863 switch (GET_CODE (x))
870 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
871 || GET_MODE (x) == DFmode)
873 return satisfies_constraint_G (x);
877 /* ??? Short term workaround for PR 28490. We must make the code here
878 match the code in ia64_expand_move and move_operand, even though they
879 are both technically wrong. */
880 if (tls_symbolic_operand_type (x) == 0)
882 HOST_WIDE_INT addend = 0;
885 if (GET_CODE (op) == CONST
886 && GET_CODE (XEXP (op, 0)) == PLUS
887 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
889 addend = INTVAL (XEXP (XEXP (op, 0), 1));
890 op = XEXP (XEXP (op, 0), 0);
893 if (any_offset_symbol_operand (op, GET_MODE (op))
894 || function_operand (op, GET_MODE (op)))
896 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
897 return (addend & 0x3fff) == 0;
904 enum machine_mode mode = GET_MODE (x);
906 if (mode == V2SFmode)
907 return satisfies_constraint_Y (x);
909 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
910 && GET_MODE_SIZE (mode) <= 8);
918 /* Don't allow TLS addresses to get spilled to memory. */
921 ia64_cannot_force_const_mem (rtx x)
923 if (GET_MODE (x) == RFmode)
925 return tls_symbolic_operand_type (x) != 0;
928 /* Expand a symbolic constant load. */
931 ia64_expand_load_address (rtx dest, rtx src)
933 gcc_assert (GET_CODE (dest) == REG);
935 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
936 having to pointer-extend the value afterward. Other forms of address
937 computation below are also more natural to compute as 64-bit quantities.
938 If we've been given an SImode destination register, change it. */
939 if (GET_MODE (dest) != Pmode)
940 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
941 byte_lowpart_offset (Pmode, GET_MODE (dest)));
945 if (small_addr_symbolic_operand (src, VOIDmode))
949 emit_insn (gen_load_gprel64 (dest, src));
950 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
951 emit_insn (gen_load_fptr (dest, src));
952 else if (sdata_symbolic_operand (src, VOIDmode))
953 emit_insn (gen_load_gprel (dest, src));
956 HOST_WIDE_INT addend = 0;
959 /* We did split constant offsets in ia64_expand_move, and we did try
960 to keep them split in move_operand, but we also allowed reload to
961 rematerialize arbitrary constants rather than spill the value to
962 the stack and reload it. So we have to be prepared here to split
964 if (GET_CODE (src) == CONST)
966 HOST_WIDE_INT hi, lo;
968 hi = INTVAL (XEXP (XEXP (src, 0), 1));
969 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
975 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
979 tmp = gen_rtx_HIGH (Pmode, src);
980 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
981 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
983 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
984 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
988 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
989 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
996 static GTY(()) rtx gen_tls_tga;
998 gen_tls_get_addr (void)
1001 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1005 static GTY(()) rtx thread_pointer_rtx;
1007 gen_thread_pointer (void)
1009 if (!thread_pointer_rtx)
1010 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1011 return thread_pointer_rtx;
1015 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1016 rtx orig_op1, HOST_WIDE_INT addend)
1018 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1020 HOST_WIDE_INT addend_lo, addend_hi;
1024 case TLS_MODEL_GLOBAL_DYNAMIC:
1027 tga_op1 = gen_reg_rtx (Pmode);
1028 emit_insn (gen_load_dtpmod (tga_op1, op1));
1030 tga_op2 = gen_reg_rtx (Pmode);
1031 emit_insn (gen_load_dtprel (tga_op2, op1));
1033 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1034 LCT_CONST, Pmode, 2, tga_op1,
1035 Pmode, tga_op2, Pmode);
1037 insns = get_insns ();
1040 if (GET_MODE (op0) != Pmode)
1042 emit_libcall_block (insns, op0, tga_ret, op1);
1045 case TLS_MODEL_LOCAL_DYNAMIC:
1046 /* ??? This isn't the completely proper way to do local-dynamic
1047 If the call to __tls_get_addr is used only by a single symbol,
1048 then we should (somehow) move the dtprel to the second arg
1049 to avoid the extra add. */
1052 tga_op1 = gen_reg_rtx (Pmode);
1053 emit_insn (gen_load_dtpmod (tga_op1, op1));
1055 tga_op2 = const0_rtx;
1057 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1058 LCT_CONST, Pmode, 2, tga_op1,
1059 Pmode, tga_op2, Pmode);
1061 insns = get_insns ();
1064 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1066 tmp = gen_reg_rtx (Pmode);
1067 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1069 if (!register_operand (op0, Pmode))
1070 op0 = gen_reg_rtx (Pmode);
1073 emit_insn (gen_load_dtprel (op0, op1));
1074 emit_insn (gen_adddi3 (op0, tmp, op0));
1077 emit_insn (gen_add_dtprel (op0, op1, tmp));
1080 case TLS_MODEL_INITIAL_EXEC:
1081 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1082 addend_hi = addend - addend_lo;
1084 op1 = plus_constant (op1, addend_hi);
1087 tmp = gen_reg_rtx (Pmode);
1088 emit_insn (gen_load_tprel (tmp, op1));
1090 if (!register_operand (op0, Pmode))
1091 op0 = gen_reg_rtx (Pmode);
1092 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1095 case TLS_MODEL_LOCAL_EXEC:
1096 if (!register_operand (op0, Pmode))
1097 op0 = gen_reg_rtx (Pmode);
1103 emit_insn (gen_load_tprel (op0, op1));
1104 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1107 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1115 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1116 orig_op0, 1, OPTAB_DIRECT);
1117 if (orig_op0 == op0)
1119 if (GET_MODE (orig_op0) == Pmode)
1121 return gen_lowpart (GET_MODE (orig_op0), op0);
1125 ia64_expand_move (rtx op0, rtx op1)
1127 enum machine_mode mode = GET_MODE (op0);
1129 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1130 op1 = force_reg (mode, op1);
1132 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1134 HOST_WIDE_INT addend = 0;
1135 enum tls_model tls_kind;
1138 if (GET_CODE (op1) == CONST
1139 && GET_CODE (XEXP (op1, 0)) == PLUS
1140 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1142 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1143 sym = XEXP (XEXP (op1, 0), 0);
1146 tls_kind = tls_symbolic_operand_type (sym);
1148 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1150 if (any_offset_symbol_operand (sym, mode))
1152 else if (aligned_offset_symbol_operand (sym, mode))
1154 HOST_WIDE_INT addend_lo, addend_hi;
1156 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1157 addend_hi = addend - addend_lo;
1161 op1 = plus_constant (sym, addend_hi);
1170 if (reload_completed)
1172 /* We really should have taken care of this offset earlier. */
1173 gcc_assert (addend == 0);
1174 if (ia64_expand_load_address (op0, op1))
1180 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1182 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1184 op1 = expand_simple_binop (mode, PLUS, subtarget,
1185 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1194 /* Split a move from OP1 to OP0 conditional on COND. */
1197 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1199 rtx insn, first = get_last_insn ();
1201 emit_move_insn (op0, op1);
1203 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1205 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1209 /* Split a post-reload TImode or TFmode reference into two DImode
1210 components. This is made extra difficult by the fact that we do
1211 not get any scratch registers to work with, because reload cannot
1212 be prevented from giving us a scratch that overlaps the register
1213 pair involved. So instead, when addressing memory, we tweak the
1214 pointer register up and back down with POST_INCs. Or up and not
1215 back down when we can get away with it.
1217 REVERSED is true when the loads must be done in reversed order
1218 (high word first) for correctness. DEAD is true when the pointer
1219 dies with the second insn we generate and therefore the second
1220 address must not carry a postmodify.
1222 May return an insn which is to be emitted after the moves. */
1225 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1229 switch (GET_CODE (in))
1232 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1233 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1238 /* Cannot occur reversed. */
1239 gcc_assert (!reversed);
1241 if (GET_MODE (in) != TFmode)
1242 split_double (in, &out[0], &out[1]);
1244 /* split_double does not understand how to split a TFmode
1245 quantity into a pair of DImode constants. */
1248 unsigned HOST_WIDE_INT p[2];
1249 long l[4]; /* TFmode is 128 bits */
1251 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1252 real_to_target (l, &r, TFmode);
1254 if (FLOAT_WORDS_BIG_ENDIAN)
1256 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1257 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1261 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1262 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1264 out[0] = GEN_INT (p[0]);
1265 out[1] = GEN_INT (p[1]);
1271 rtx base = XEXP (in, 0);
1274 switch (GET_CODE (base))
1279 out[0] = adjust_automodify_address
1280 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1281 out[1] = adjust_automodify_address
1282 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1286 /* Reversal requires a pre-increment, which can only
1287 be done as a separate insn. */
1288 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1289 out[0] = adjust_automodify_address
1290 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1291 out[1] = adjust_address (in, DImode, 0);
1296 gcc_assert (!reversed && !dead);
1298 /* Just do the increment in two steps. */
1299 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1300 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1304 gcc_assert (!reversed && !dead);
1306 /* Add 8, subtract 24. */
1307 base = XEXP (base, 0);
1308 out[0] = adjust_automodify_address
1309 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1310 out[1] = adjust_automodify_address
1312 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1317 gcc_assert (!reversed && !dead);
1319 /* Extract and adjust the modification. This case is
1320 trickier than the others, because we might have an
1321 index register, or we might have a combined offset that
1322 doesn't fit a signed 9-bit displacement field. We can
1323 assume the incoming expression is already legitimate. */
1324 offset = XEXP (base, 1);
1325 base = XEXP (base, 0);
1327 out[0] = adjust_automodify_address
1328 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1330 if (GET_CODE (XEXP (offset, 1)) == REG)
1332 /* Can't adjust the postmodify to match. Emit the
1333 original, then a separate addition insn. */
1334 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1335 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1339 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1340 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1342 /* Again the postmodify cannot be made to match,
1343 but in this case it's more efficient to get rid
1344 of the postmodify entirely and fix up with an
1346 out[1] = adjust_automodify_address (in, DImode, base, 8);
1348 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1352 /* Combined offset still fits in the displacement field.
1353 (We cannot overflow it at the high end.) */
1354 out[1] = adjust_automodify_address
1355 (in, DImode, gen_rtx_POST_MODIFY
1356 (Pmode, base, gen_rtx_PLUS
1358 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1377 /* Split a TImode or TFmode move instruction after reload.
1378 This is used by *movtf_internal and *movti_internal. */
1380 ia64_split_tmode_move (rtx operands[])
1382 rtx in[2], out[2], insn;
1385 bool reversed = false;
1387 /* It is possible for reload to decide to overwrite a pointer with
1388 the value it points to. In that case we have to do the loads in
1389 the appropriate order so that the pointer is not destroyed too
1390 early. Also we must not generate a postmodify for that second
1391 load, or rws_access_regno will die. */
1392 if (GET_CODE (operands[1]) == MEM
1393 && reg_overlap_mentioned_p (operands[0], operands[1]))
1395 rtx base = XEXP (operands[1], 0);
1396 while (GET_CODE (base) != REG)
1397 base = XEXP (base, 0);
1399 if (REGNO (base) == REGNO (operands[0]))
1403 /* Another reason to do the moves in reversed order is if the first
1404 element of the target register pair is also the second element of
1405 the source register pair. */
1406 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1407 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1410 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1411 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1413 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1414 if (GET_CODE (EXP) == MEM \
1415 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1416 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1417 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1418 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1420 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1421 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1422 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1424 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1425 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1426 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1429 emit_insn (fixup[0]);
1431 emit_insn (fixup[1]);
1433 #undef MAYBE_ADD_REG_INC_NOTE
1436 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1437 through memory plus an extra GR scratch register. Except that you can
1438 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1439 SECONDARY_RELOAD_CLASS, but not both.
1441 We got into problems in the first place by allowing a construct like
1442 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1443 This solution attempts to prevent this situation from occurring. When
1444 we see something like the above, we spill the inner register to memory. */
1447 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1449 if (GET_CODE (in) == SUBREG
1450 && GET_MODE (SUBREG_REG (in)) == TImode
1451 && GET_CODE (SUBREG_REG (in)) == REG)
1453 rtx memt = assign_stack_temp (TImode, 16, 0);
1454 emit_move_insn (memt, SUBREG_REG (in));
1455 return adjust_address (memt, mode, 0);
1457 else if (force && GET_CODE (in) == REG)
1459 rtx memx = assign_stack_temp (mode, 16, 0);
1460 emit_move_insn (memx, in);
1467 /* Expand the movxf or movrf pattern (MODE says which) with the given
1468 OPERANDS, returning true if the pattern should then invoke
1472 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1474 rtx op0 = operands[0];
1476 if (GET_CODE (op0) == SUBREG)
1477 op0 = SUBREG_REG (op0);
1479 /* We must support XFmode loads into general registers for stdarg/vararg,
1480 unprototyped calls, and a rare case where a long double is passed as
1481 an argument after a float HFA fills the FP registers. We split them into
1482 DImode loads for convenience. We also need to support XFmode stores
1483 for the last case. This case does not happen for stdarg/vararg routines,
1484 because we do a block store to memory of unnamed arguments. */
1486 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1490 /* We're hoping to transform everything that deals with XFmode
1491 quantities and GR registers early in the compiler. */
1492 gcc_assert (can_create_pseudo_p ());
1494 /* Struct to register can just use TImode instead. */
1495 if ((GET_CODE (operands[1]) == SUBREG
1496 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1497 || (GET_CODE (operands[1]) == REG
1498 && GR_REGNO_P (REGNO (operands[1]))))
1500 rtx op1 = operands[1];
1502 if (GET_CODE (op1) == SUBREG)
1503 op1 = SUBREG_REG (op1);
1505 op1 = gen_rtx_REG (TImode, REGNO (op1));
1507 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1511 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1513 /* Don't word-swap when reading in the constant. */
1514 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1515 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1517 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1518 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1523 /* If the quantity is in a register not known to be GR, spill it. */
1524 if (register_operand (operands[1], mode))
1525 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1527 gcc_assert (GET_CODE (operands[1]) == MEM);
1529 /* Don't word-swap when reading in the value. */
1530 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1531 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1533 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1534 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1538 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1540 /* We're hoping to transform everything that deals with XFmode
1541 quantities and GR registers early in the compiler. */
1542 gcc_assert (can_create_pseudo_p ());
1544 /* Op0 can't be a GR_REG here, as that case is handled above.
1545 If op0 is a register, then we spill op1, so that we now have a
1546 MEM operand. This requires creating an XFmode subreg of a TImode reg
1547 to force the spill. */
1548 if (register_operand (operands[0], mode))
1550 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1551 op1 = gen_rtx_SUBREG (mode, op1, 0);
1552 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1559 gcc_assert (GET_CODE (operands[0]) == MEM);
1561 /* Don't word-swap when writing out the value. */
1562 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1563 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1565 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1566 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1571 if (!reload_in_progress && !reload_completed)
1573 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1575 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1577 rtx memt, memx, in = operands[1];
1578 if (CONSTANT_P (in))
1579 in = validize_mem (force_const_mem (mode, in));
1580 if (GET_CODE (in) == MEM)
1581 memt = adjust_address (in, TImode, 0);
1584 memt = assign_stack_temp (TImode, 16, 0);
1585 memx = adjust_address (memt, mode, 0);
1586 emit_move_insn (memx, in);
1588 emit_move_insn (op0, memt);
1592 if (!ia64_move_ok (operands[0], operands[1]))
1593 operands[1] = force_reg (mode, operands[1]);
1599 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1600 with the expression that holds the compare result (in VOIDmode). */
1602 static GTY(()) rtx cmptf_libfunc;
1605 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1607 enum rtx_code code = GET_CODE (*expr);
1610 /* If we have a BImode input, then we already have a compare result, and
1611 do not need to emit another comparison. */
1612 if (GET_MODE (*op0) == BImode)
1614 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1617 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1618 magic number as its third argument, that indicates what to do.
1619 The return value is an integer to be compared against zero. */
1620 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1623 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1630 enum rtx_code ncode;
1633 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1636 /* 1 = equal, 0 = not equal. Equality operators do
1637 not raise FP_INVALID when given an SNaN operand. */
1638 case EQ: magic = QCMP_EQ; ncode = NE; break;
1639 case NE: magic = QCMP_EQ; ncode = EQ; break;
1640 /* isunordered() from C99. */
1641 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1642 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1643 /* Relational operators raise FP_INVALID when given
1645 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1646 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1647 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1648 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1649 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1650 Expanders for buneq etc. weuld have to be added to ia64.md
1651 for this to be useful. */
1652 default: gcc_unreachable ();
1657 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1658 *op0, TFmode, *op1, TFmode,
1659 GEN_INT (magic), DImode);
1660 cmp = gen_reg_rtx (BImode);
1661 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1662 gen_rtx_fmt_ee (ncode, BImode,
1665 insns = get_insns ();
1668 emit_libcall_block (insns, cmp, cmp,
1669 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1674 cmp = gen_reg_rtx (BImode);
1675 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1676 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1680 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1685 /* Generate an integral vector comparison. Return true if the condition has
1686 been reversed, and so the sense of the comparison should be inverted. */
1689 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1690 rtx dest, rtx op0, rtx op1)
1692 bool negate = false;
1695 /* Canonicalize the comparison to EQ, GT, GTU. */
1706 code = reverse_condition (code);
1712 code = reverse_condition (code);
1718 code = swap_condition (code);
1719 x = op0, op0 = op1, op1 = x;
1726 /* Unsigned parallel compare is not supported by the hardware. Play some
1727 tricks to turn this into a signed comparison against 0. */
1736 /* Subtract (-(INT MAX) - 1) from both operands to make
1738 mask = GEN_INT (0x80000000);
1739 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1740 mask = force_reg (mode, mask);
1741 t1 = gen_reg_rtx (mode);
1742 emit_insn (gen_subv2si3 (t1, op0, mask));
1743 t2 = gen_reg_rtx (mode);
1744 emit_insn (gen_subv2si3 (t2, op1, mask));
1753 /* Perform a parallel unsigned saturating subtraction. */
1754 x = gen_reg_rtx (mode);
1755 emit_insn (gen_rtx_SET (VOIDmode, x,
1756 gen_rtx_US_MINUS (mode, op0, op1)));
1760 op1 = CONST0_RTX (mode);
1769 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1770 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1775 /* Emit an integral vector conditional move. */
1778 ia64_expand_vecint_cmov (rtx operands[])
1780 enum machine_mode mode = GET_MODE (operands[0]);
1781 enum rtx_code code = GET_CODE (operands[3]);
1785 cmp = gen_reg_rtx (mode);
1786 negate = ia64_expand_vecint_compare (code, mode, cmp,
1787 operands[4], operands[5]);
1789 ot = operands[1+negate];
1790 of = operands[2-negate];
1792 if (ot == CONST0_RTX (mode))
1794 if (of == CONST0_RTX (mode))
1796 emit_move_insn (operands[0], ot);
1800 x = gen_rtx_NOT (mode, cmp);
1801 x = gen_rtx_AND (mode, x, of);
1802 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1804 else if (of == CONST0_RTX (mode))
1806 x = gen_rtx_AND (mode, cmp, ot);
1807 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1813 t = gen_reg_rtx (mode);
1814 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1815 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1817 f = gen_reg_rtx (mode);
1818 x = gen_rtx_NOT (mode, cmp);
1819 x = gen_rtx_AND (mode, x, operands[2-negate]);
1820 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1822 x = gen_rtx_IOR (mode, t, f);
1823 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1827 /* Emit an integral vector min or max operation. Return true if all done. */
1830 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1835 /* These four combinations are supported directly. */
1836 if (mode == V8QImode && (code == UMIN || code == UMAX))
1838 if (mode == V4HImode && (code == SMIN || code == SMAX))
1841 /* This combination can be implemented with only saturating subtraction. */
1842 if (mode == V4HImode && code == UMAX)
1844 rtx x, tmp = gen_reg_rtx (mode);
1846 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1847 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1849 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1853 /* Everything else implemented via vector comparisons. */
1854 xops[0] = operands[0];
1855 xops[4] = xops[1] = operands[1];
1856 xops[5] = xops[2] = operands[2];
1875 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1877 ia64_expand_vecint_cmov (xops);
1881 /* Emit an integral vector widening sum operations. */
1884 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1887 enum machine_mode wmode, mode;
1888 rtx (*unpack_l) (rtx, rtx, rtx);
1889 rtx (*unpack_h) (rtx, rtx, rtx);
1890 rtx (*plus) (rtx, rtx, rtx);
1892 wmode = GET_MODE (operands[0]);
1893 mode = GET_MODE (operands[1]);
1898 unpack_l = gen_unpack1_l;
1899 unpack_h = gen_unpack1_h;
1900 plus = gen_addv4hi3;
1903 unpack_l = gen_unpack2_l;
1904 unpack_h = gen_unpack2_h;
1905 plus = gen_addv2si3;
1911 /* Fill in x with the sign extension of each element in op1. */
1913 x = CONST0_RTX (mode);
1918 x = gen_reg_rtx (mode);
1920 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1925 l = gen_reg_rtx (wmode);
1926 h = gen_reg_rtx (wmode);
1927 s = gen_reg_rtx (wmode);
1929 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1930 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1931 emit_insn (plus (s, l, operands[2]));
1932 emit_insn (plus (operands[0], h, s));
1935 /* Emit a signed or unsigned V8QI dot product operation. */
1938 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1940 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1942 /* Fill in x1 and x2 with the sign extension of each element. */
1944 x1 = x2 = CONST0_RTX (V8QImode);
1949 x1 = gen_reg_rtx (V8QImode);
1950 x2 = gen_reg_rtx (V8QImode);
1952 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1953 CONST0_RTX (V8QImode));
1955 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1956 CONST0_RTX (V8QImode));
1960 l1 = gen_reg_rtx (V4HImode);
1961 l2 = gen_reg_rtx (V4HImode);
1962 h1 = gen_reg_rtx (V4HImode);
1963 h2 = gen_reg_rtx (V4HImode);
1965 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1966 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1967 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1968 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1970 p1 = gen_reg_rtx (V2SImode);
1971 p2 = gen_reg_rtx (V2SImode);
1972 p3 = gen_reg_rtx (V2SImode);
1973 p4 = gen_reg_rtx (V2SImode);
1974 emit_insn (gen_pmpy2_r (p1, l1, l2));
1975 emit_insn (gen_pmpy2_l (p2, l1, l2));
1976 emit_insn (gen_pmpy2_r (p3, h1, h2));
1977 emit_insn (gen_pmpy2_l (p4, h1, h2));
1979 s1 = gen_reg_rtx (V2SImode);
1980 s2 = gen_reg_rtx (V2SImode);
1981 s3 = gen_reg_rtx (V2SImode);
1982 emit_insn (gen_addv2si3 (s1, p1, p2));
1983 emit_insn (gen_addv2si3 (s2, p3, p4));
1984 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1985 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1988 /* Emit the appropriate sequence for a call. */
1991 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1996 addr = XEXP (addr, 0);
1997 addr = convert_memory_address (DImode, addr);
1998 b0 = gen_rtx_REG (DImode, R_BR (0));
2000 /* ??? Should do this for functions known to bind local too. */
2001 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2004 insn = gen_sibcall_nogp (addr);
2006 insn = gen_call_nogp (addr, b0);
2008 insn = gen_call_value_nogp (retval, addr, b0);
2009 insn = emit_call_insn (insn);
2014 insn = gen_sibcall_gp (addr);
2016 insn = gen_call_gp (addr, b0);
2018 insn = gen_call_value_gp (retval, addr, b0);
2019 insn = emit_call_insn (insn);
2021 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2025 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2027 if (TARGET_ABI_OPEN_VMS)
2028 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2029 gen_rtx_REG (DImode, GR_REG (25)));
2033 reg_emitted (enum ia64_frame_regs r)
2035 if (emitted_frame_related_regs[r] == 0)
2036 emitted_frame_related_regs[r] = current_frame_info.r[r];
2038 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2042 get_reg (enum ia64_frame_regs r)
2045 return current_frame_info.r[r];
2049 is_emitted (int regno)
2053 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2054 if (emitted_frame_related_regs[r] == regno)
2060 ia64_reload_gp (void)
2064 if (current_frame_info.r[reg_save_gp])
2066 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2070 HOST_WIDE_INT offset;
2073 offset = (current_frame_info.spill_cfa_off
2074 + current_frame_info.spill_size);
2075 if (frame_pointer_needed)
2077 tmp = hard_frame_pointer_rtx;
2082 tmp = stack_pointer_rtx;
2083 offset = current_frame_info.total_size - offset;
2086 offset_r = GEN_INT (offset);
2087 if (satisfies_constraint_I (offset_r))
2088 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2091 emit_move_insn (pic_offset_table_rtx, offset_r);
2092 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2093 pic_offset_table_rtx, tmp));
2096 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2099 emit_move_insn (pic_offset_table_rtx, tmp);
2103 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2104 rtx scratch_b, int noreturn_p, int sibcall_p)
2107 bool is_desc = false;
2109 /* If we find we're calling through a register, then we're actually
2110 calling through a descriptor, so load up the values. */
2111 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2116 /* ??? We are currently constrained to *not* use peep2, because
2117 we can legitimately change the global lifetime of the GP
2118 (in the form of killing where previously live). This is
2119 because a call through a descriptor doesn't use the previous
2120 value of the GP, while a direct call does, and we do not
2121 commit to either form until the split here.
2123 That said, this means that we lack precise life info for
2124 whether ADDR is dead after this call. This is not terribly
2125 important, since we can fix things up essentially for free
2126 with the POST_DEC below, but it's nice to not use it when we
2127 can immediately tell it's not necessary. */
2128 addr_dead_p = ((noreturn_p || sibcall_p
2129 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2131 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2133 /* Load the code address into scratch_b. */
2134 tmp = gen_rtx_POST_INC (Pmode, addr);
2135 tmp = gen_rtx_MEM (Pmode, tmp);
2136 emit_move_insn (scratch_r, tmp);
2137 emit_move_insn (scratch_b, scratch_r);
2139 /* Load the GP address. If ADDR is not dead here, then we must
2140 revert the change made above via the POST_INCREMENT. */
2142 tmp = gen_rtx_POST_DEC (Pmode, addr);
2145 tmp = gen_rtx_MEM (Pmode, tmp);
2146 emit_move_insn (pic_offset_table_rtx, tmp);
2153 insn = gen_sibcall_nogp (addr);
2155 insn = gen_call_value_nogp (retval, addr, retaddr);
2157 insn = gen_call_nogp (addr, retaddr);
2158 emit_call_insn (insn);
2160 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2164 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2166 This differs from the generic code in that we know about the zero-extending
2167 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2168 also know that ld.acq+cmpxchg.rel equals a full barrier.
2170 The loop we want to generate looks like
2175 new_reg = cmp_reg op val;
2176 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2177 if (cmp_reg != old_reg)
2180 Note that we only do the plain load from memory once. Subsequent
2181 iterations use the value loaded by the compare-and-swap pattern. */
2184 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2185 rtx old_dst, rtx new_dst)
2187 enum machine_mode mode = GET_MODE (mem);
2188 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2189 enum insn_code icode;
2191 /* Special case for using fetchadd. */
2192 if ((mode == SImode || mode == DImode)
2193 && (code == PLUS || code == MINUS)
2194 && fetchadd_operand (val, mode))
2197 val = GEN_INT (-INTVAL (val));
2200 old_dst = gen_reg_rtx (mode);
2202 emit_insn (gen_memory_barrier ());
2205 icode = CODE_FOR_fetchadd_acq_si;
2207 icode = CODE_FOR_fetchadd_acq_di;
2208 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2212 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2214 if (new_reg != new_dst)
2215 emit_move_insn (new_dst, new_reg);
2220 /* Because of the volatile mem read, we get an ld.acq, which is the
2221 front half of the full barrier. The end half is the cmpxchg.rel. */
2222 gcc_assert (MEM_VOLATILE_P (mem));
2224 old_reg = gen_reg_rtx (DImode);
2225 cmp_reg = gen_reg_rtx (DImode);
2226 label = gen_label_rtx ();
2230 val = simplify_gen_subreg (DImode, val, mode, 0);
2231 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2234 emit_move_insn (cmp_reg, mem);
2238 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2239 emit_move_insn (old_reg, cmp_reg);
2240 emit_move_insn (ar_ccv, cmp_reg);
2243 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2248 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2249 true, OPTAB_DIRECT);
2250 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2253 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2254 true, OPTAB_DIRECT);
2257 new_reg = gen_lowpart (mode, new_reg);
2259 emit_move_insn (new_dst, new_reg);
2263 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2264 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2265 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2266 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2271 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2273 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2276 /* Begin the assembly file. */
2279 ia64_file_start (void)
2281 /* Variable tracking should be run after all optimizations which change order
2282 of insns. It also needs a valid CFG. This can't be done in
2283 ia64_override_options, because flag_var_tracking is finalized after
2285 ia64_flag_var_tracking = flag_var_tracking;
2286 flag_var_tracking = 0;
2288 default_file_start ();
2289 emit_safe_across_calls ();
2293 emit_safe_across_calls (void)
2295 unsigned int rs, re;
2302 while (rs < 64 && call_used_regs[PR_REG (rs)])
2306 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2310 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2314 fputc (',', asm_out_file);
2316 fprintf (asm_out_file, "p%u", rs);
2318 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2322 fputc ('\n', asm_out_file);
2325 /* Globalize a declaration. */
2328 ia64_globalize_decl_name (FILE * stream, tree decl)
2330 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2331 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2334 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2335 const char *p = TREE_STRING_POINTER (v);
2336 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2338 targetm.asm_out.globalize_label (stream, name);
2339 if (TREE_CODE (decl) == FUNCTION_DECL)
2340 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2343 /* Helper function for ia64_compute_frame_size: find an appropriate general
2344 register to spill some special register to. SPECIAL_SPILL_MASK contains
2345 bits in GR0 to GR31 that have already been allocated by this routine.
2346 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2349 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2353 if (emitted_frame_related_regs[r] != 0)
2355 regno = emitted_frame_related_regs[r];
2356 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2357 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2358 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2359 else if (current_function_is_leaf
2360 && regno >= GR_REG (1) && regno <= GR_REG (31))
2361 current_frame_info.gr_used_mask |= 1 << regno;
2366 /* If this is a leaf function, first try an otherwise unused
2367 call-clobbered register. */
2368 if (current_function_is_leaf)
2370 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2371 if (! df_regs_ever_live_p (regno)
2372 && call_used_regs[regno]
2373 && ! fixed_regs[regno]
2374 && ! global_regs[regno]
2375 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2376 && ! is_emitted (regno))
2378 current_frame_info.gr_used_mask |= 1 << regno;
2385 regno = current_frame_info.n_local_regs;
2386 /* If there is a frame pointer, then we can't use loc79, because
2387 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2388 reg_name switching code in ia64_expand_prologue. */
2389 while (regno < (80 - frame_pointer_needed))
2390 if (! is_emitted (LOC_REG (regno++)))
2392 current_frame_info.n_local_regs = regno;
2393 return LOC_REG (regno - 1);
2397 /* Failed to find a general register to spill to. Must use stack. */
2401 /* In order to make for nice schedules, we try to allocate every temporary
2402 to a different register. We must of course stay away from call-saved,
2403 fixed, and global registers. We must also stay away from registers
2404 allocated in current_frame_info.gr_used_mask, since those include regs
2405 used all through the prologue.
2407 Any register allocated here must be used immediately. The idea is to
2408 aid scheduling, not to solve data flow problems. */
2410 static int last_scratch_gr_reg;
2413 next_scratch_gr_reg (void)
2417 for (i = 0; i < 32; ++i)
2419 regno = (last_scratch_gr_reg + i + 1) & 31;
2420 if (call_used_regs[regno]
2421 && ! fixed_regs[regno]
2422 && ! global_regs[regno]
2423 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2425 last_scratch_gr_reg = regno;
2430 /* There must be _something_ available. */
2434 /* Helper function for ia64_compute_frame_size, called through
2435 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2438 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2440 unsigned int regno = REGNO (reg);
2443 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2444 for (i = 0; i < n; ++i)
2445 current_frame_info.gr_used_mask |= 1 << (regno + i);
2450 /* Returns the number of bytes offset between the frame pointer and the stack
2451 pointer for the current function. SIZE is the number of bytes of space
2452 needed for local variables. */
2455 ia64_compute_frame_size (HOST_WIDE_INT size)
2457 HOST_WIDE_INT total_size;
2458 HOST_WIDE_INT spill_size = 0;
2459 HOST_WIDE_INT extra_spill_size = 0;
2460 HOST_WIDE_INT pretend_args_size;
2463 int spilled_gr_p = 0;
2464 int spilled_fr_p = 0;
2470 if (current_frame_info.initialized)
2473 memset (¤t_frame_info, 0, sizeof current_frame_info);
2474 CLEAR_HARD_REG_SET (mask);
2476 /* Don't allocate scratches to the return register. */
2477 diddle_return_value (mark_reg_gr_used_mask, NULL);
2479 /* Don't allocate scratches to the EH scratch registers. */
2480 if (cfun->machine->ia64_eh_epilogue_sp)
2481 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2482 if (cfun->machine->ia64_eh_epilogue_bsp)
2483 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2485 /* Find the size of the register stack frame. We have only 80 local
2486 registers, because we reserve 8 for the inputs and 8 for the
2489 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2490 since we'll be adjusting that down later. */
2491 regno = LOC_REG (78) + ! frame_pointer_needed;
2492 for (; regno >= LOC_REG (0); regno--)
2493 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2495 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2497 /* For functions marked with the syscall_linkage attribute, we must mark
2498 all eight input registers as in use, so that locals aren't visible to
2501 if (cfun->machine->n_varargs > 0
2502 || lookup_attribute ("syscall_linkage",
2503 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2504 current_frame_info.n_input_regs = 8;
2507 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2508 if (df_regs_ever_live_p (regno))
2510 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2513 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2514 if (df_regs_ever_live_p (regno))
2516 i = regno - OUT_REG (0) + 1;
2518 #ifndef PROFILE_HOOK
2519 /* When -p profiling, we need one output register for the mcount argument.
2520 Likewise for -a profiling for the bb_init_func argument. For -ax
2521 profiling, we need two output registers for the two bb_init_trace_func
2526 current_frame_info.n_output_regs = i;
2528 /* ??? No rotating register support yet. */
2529 current_frame_info.n_rotate_regs = 0;
2531 /* Discover which registers need spilling, and how much room that
2532 will take. Begin with floating point and general registers,
2533 which will always wind up on the stack. */
2535 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2536 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2538 SET_HARD_REG_BIT (mask, regno);
2544 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2545 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2547 SET_HARD_REG_BIT (mask, regno);
2553 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2554 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2556 SET_HARD_REG_BIT (mask, regno);
2561 /* Now come all special registers that might get saved in other
2562 general registers. */
2564 if (frame_pointer_needed)
2566 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2567 /* If we did not get a register, then we take LOC79. This is guaranteed
2568 to be free, even if regs_ever_live is already set, because this is
2569 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2570 as we don't count loc79 above. */
2571 if (current_frame_info.r[reg_fp] == 0)
2573 current_frame_info.r[reg_fp] = LOC_REG (79);
2574 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2578 if (! current_function_is_leaf)
2580 /* Emit a save of BR0 if we call other functions. Do this even
2581 if this function doesn't return, as EH depends on this to be
2582 able to unwind the stack. */
2583 SET_HARD_REG_BIT (mask, BR_REG (0));
2585 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2586 if (current_frame_info.r[reg_save_b0] == 0)
2588 extra_spill_size += 8;
2592 /* Similarly for ar.pfs. */
2593 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2594 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2595 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2597 extra_spill_size += 8;
2601 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2602 registers are clobbered, so we fall back to the stack. */
2603 current_frame_info.r[reg_save_gp]
2604 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2605 if (current_frame_info.r[reg_save_gp] == 0)
2607 SET_HARD_REG_BIT (mask, GR_REG (1));
2614 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2616 SET_HARD_REG_BIT (mask, BR_REG (0));
2617 extra_spill_size += 8;
2621 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2623 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2624 current_frame_info.r[reg_save_ar_pfs]
2625 = find_gr_spill (reg_save_ar_pfs, 1);
2626 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2628 extra_spill_size += 8;
2634 /* Unwind descriptor hackery: things are most efficient if we allocate
2635 consecutive GR save registers for RP, PFS, FP in that order. However,
2636 it is absolutely critical that FP get the only hard register that's
2637 guaranteed to be free, so we allocated it first. If all three did
2638 happen to be allocated hard regs, and are consecutive, rearrange them
2639 into the preferred order now.
2641 If we have already emitted code for any of those registers,
2642 then it's already too late to change. */
2643 min_regno = MIN (current_frame_info.r[reg_fp],
2644 MIN (current_frame_info.r[reg_save_b0],
2645 current_frame_info.r[reg_save_ar_pfs]));
2646 max_regno = MAX (current_frame_info.r[reg_fp],
2647 MAX (current_frame_info.r[reg_save_b0],
2648 current_frame_info.r[reg_save_ar_pfs]));
2650 && min_regno + 2 == max_regno
2651 && (current_frame_info.r[reg_fp] == min_regno + 1
2652 || current_frame_info.r[reg_save_b0] == min_regno + 1
2653 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2654 && (emitted_frame_related_regs[reg_save_b0] == 0
2655 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2656 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2657 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2658 && (emitted_frame_related_regs[reg_fp] == 0
2659 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2661 current_frame_info.r[reg_save_b0] = min_regno;
2662 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2663 current_frame_info.r[reg_fp] = min_regno + 2;
2666 /* See if we need to store the predicate register block. */
2667 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2668 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2670 if (regno <= PR_REG (63))
2672 SET_HARD_REG_BIT (mask, PR_REG (0));
2673 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2674 if (current_frame_info.r[reg_save_pr] == 0)
2676 extra_spill_size += 8;
2680 /* ??? Mark them all as used so that register renaming and such
2681 are free to use them. */
2682 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2683 df_set_regs_ever_live (regno, true);
2686 /* If we're forced to use st8.spill, we're forced to save and restore
2687 ar.unat as well. The check for existing liveness allows inline asm
2688 to touch ar.unat. */
2689 if (spilled_gr_p || cfun->machine->n_varargs
2690 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2692 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2693 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2694 current_frame_info.r[reg_save_ar_unat]
2695 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2696 if (current_frame_info.r[reg_save_ar_unat] == 0)
2698 extra_spill_size += 8;
2703 if (df_regs_ever_live_p (AR_LC_REGNUM))
2705 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2706 current_frame_info.r[reg_save_ar_lc]
2707 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2708 if (current_frame_info.r[reg_save_ar_lc] == 0)
2710 extra_spill_size += 8;
2715 /* If we have an odd number of words of pretend arguments written to
2716 the stack, then the FR save area will be unaligned. We round the
2717 size of this area up to keep things 16 byte aligned. */
2719 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2721 pretend_args_size = crtl->args.pretend_args_size;
2723 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2724 + crtl->outgoing_args_size);
2725 total_size = IA64_STACK_ALIGN (total_size);
2727 /* We always use the 16-byte scratch area provided by the caller, but
2728 if we are a leaf function, there's no one to which we need to provide
2730 if (current_function_is_leaf)
2731 total_size = MAX (0, total_size - 16);
2733 current_frame_info.total_size = total_size;
2734 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2735 current_frame_info.spill_size = spill_size;
2736 current_frame_info.extra_spill_size = extra_spill_size;
2737 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2738 current_frame_info.n_spilled = n_spilled;
2739 current_frame_info.initialized = reload_completed;
2742 /* Worker function for TARGET_CAN_ELIMINATE. */
2745 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2747 return (to == BR_REG (0) ? current_function_is_leaf : true);
2750 /* Compute the initial difference between the specified pair of registers. */
2753 ia64_initial_elimination_offset (int from, int to)
2755 HOST_WIDE_INT offset;
2757 ia64_compute_frame_size (get_frame_size ());
2760 case FRAME_POINTER_REGNUM:
2763 case HARD_FRAME_POINTER_REGNUM:
2764 if (current_function_is_leaf)
2765 offset = -current_frame_info.total_size;
2767 offset = -(current_frame_info.total_size
2768 - crtl->outgoing_args_size - 16);
2771 case STACK_POINTER_REGNUM:
2772 if (current_function_is_leaf)
2775 offset = 16 + crtl->outgoing_args_size;
2783 case ARG_POINTER_REGNUM:
2784 /* Arguments start above the 16 byte save area, unless stdarg
2785 in which case we store through the 16 byte save area. */
2788 case HARD_FRAME_POINTER_REGNUM:
2789 offset = 16 - crtl->args.pretend_args_size;
2792 case STACK_POINTER_REGNUM:
2793 offset = (current_frame_info.total_size
2794 + 16 - crtl->args.pretend_args_size);
2809 /* If there are more than a trivial number of register spills, we use
2810 two interleaved iterators so that we can get two memory references
2813 In order to simplify things in the prologue and epilogue expanders,
2814 we use helper functions to fix up the memory references after the
2815 fact with the appropriate offsets to a POST_MODIFY memory mode.
2816 The following data structure tracks the state of the two iterators
2817 while insns are being emitted. */
2819 struct spill_fill_data
2821 rtx init_after; /* point at which to emit initializations */
2822 rtx init_reg[2]; /* initial base register */
2823 rtx iter_reg[2]; /* the iterator registers */
2824 rtx *prev_addr[2]; /* address of last memory use */
2825 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2826 HOST_WIDE_INT prev_off[2]; /* last offset */
2827 int n_iter; /* number of iterators in use */
2828 int next_iter; /* next iterator to use */
2829 unsigned int save_gr_used_mask;
2832 static struct spill_fill_data spill_fill_data;
2835 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2839 spill_fill_data.init_after = get_last_insn ();
2840 spill_fill_data.init_reg[0] = init_reg;
2841 spill_fill_data.init_reg[1] = init_reg;
2842 spill_fill_data.prev_addr[0] = NULL;
2843 spill_fill_data.prev_addr[1] = NULL;
2844 spill_fill_data.prev_insn[0] = NULL;
2845 spill_fill_data.prev_insn[1] = NULL;
2846 spill_fill_data.prev_off[0] = cfa_off;
2847 spill_fill_data.prev_off[1] = cfa_off;
2848 spill_fill_data.next_iter = 0;
2849 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2851 spill_fill_data.n_iter = 1 + (n_spills > 2);
2852 for (i = 0; i < spill_fill_data.n_iter; ++i)
2854 int regno = next_scratch_gr_reg ();
2855 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2856 current_frame_info.gr_used_mask |= 1 << regno;
2861 finish_spill_pointers (void)
2863 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2867 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2869 int iter = spill_fill_data.next_iter;
2870 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2871 rtx disp_rtx = GEN_INT (disp);
2874 if (spill_fill_data.prev_addr[iter])
2876 if (satisfies_constraint_N (disp_rtx))
2878 *spill_fill_data.prev_addr[iter]
2879 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2880 gen_rtx_PLUS (DImode,
2881 spill_fill_data.iter_reg[iter],
2883 add_reg_note (spill_fill_data.prev_insn[iter],
2884 REG_INC, spill_fill_data.iter_reg[iter]);
2888 /* ??? Could use register post_modify for loads. */
2889 if (!satisfies_constraint_I (disp_rtx))
2891 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2892 emit_move_insn (tmp, disp_rtx);
2895 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2896 spill_fill_data.iter_reg[iter], disp_rtx));
2899 /* Micro-optimization: if we've created a frame pointer, it's at
2900 CFA 0, which may allow the real iterator to be initialized lower,
2901 slightly increasing parallelism. Also, if there are few saves
2902 it may eliminate the iterator entirely. */
2904 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2905 && frame_pointer_needed)
2907 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2908 set_mem_alias_set (mem, get_varargs_alias_set ());
2916 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2917 spill_fill_data.init_reg[iter]);
2922 if (!satisfies_constraint_I (disp_rtx))
2924 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2925 emit_move_insn (tmp, disp_rtx);
2929 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2930 spill_fill_data.init_reg[iter],
2937 /* Careful for being the first insn in a sequence. */
2938 if (spill_fill_data.init_after)
2939 insn = emit_insn_after (seq, spill_fill_data.init_after);
2942 rtx first = get_insns ();
2944 insn = emit_insn_before (seq, first);
2946 insn = emit_insn (seq);
2948 spill_fill_data.init_after = insn;
2951 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2953 /* ??? Not all of the spills are for varargs, but some of them are.
2954 The rest of the spills belong in an alias set of their own. But
2955 it doesn't actually hurt to include them here. */
2956 set_mem_alias_set (mem, get_varargs_alias_set ());
2958 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2959 spill_fill_data.prev_off[iter] = cfa_off;
2961 if (++iter >= spill_fill_data.n_iter)
2963 spill_fill_data.next_iter = iter;
2969 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2972 int iter = spill_fill_data.next_iter;
2975 mem = spill_restore_mem (reg, cfa_off);
2976 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2977 spill_fill_data.prev_insn[iter] = insn;
2984 RTX_FRAME_RELATED_P (insn) = 1;
2986 /* Don't even pretend that the unwind code can intuit its way
2987 through a pair of interleaved post_modify iterators. Just
2988 provide the correct answer. */
2990 if (frame_pointer_needed)
2992 base = hard_frame_pointer_rtx;
2997 base = stack_pointer_rtx;
2998 off = current_frame_info.total_size - cfa_off;
3001 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3002 gen_rtx_SET (VOIDmode,
3003 gen_rtx_MEM (GET_MODE (reg),
3004 plus_constant (base, off)),
3010 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3012 int iter = spill_fill_data.next_iter;
3015 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3016 GEN_INT (cfa_off)));
3017 spill_fill_data.prev_insn[iter] = insn;
3020 /* Wrapper functions that discards the CONST_INT spill offset. These
3021 exist so that we can give gr_spill/gr_fill the offset they need and
3022 use a consistent function interface. */
3025 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3027 return gen_movdi (dest, src);
3031 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3033 return gen_fr_spill (dest, src);
3037 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3039 return gen_fr_restore (dest, src);
3042 /* Called after register allocation to add any instructions needed for the
3043 prologue. Using a prologue insn is favored compared to putting all of the
3044 instructions in output_function_prologue(), since it allows the scheduler
3045 to intermix instructions with the saves of the caller saved registers. In
3046 some cases, it might be necessary to emit a barrier instruction as the last
3047 insn to prevent such scheduling.
3049 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3050 so that the debug info generation code can handle them properly.
3052 The register save area is layed out like so:
3054 [ varargs spill area ]
3055 [ fr register spill area ]
3056 [ br register spill area ]
3057 [ ar register spill area ]
3058 [ pr register spill area ]
3059 [ gr register spill area ] */
3061 /* ??? Get inefficient code when the frame size is larger than can fit in an
3062 adds instruction. */
3065 ia64_expand_prologue (void)
3067 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3068 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3071 ia64_compute_frame_size (get_frame_size ());
3072 last_scratch_gr_reg = 15;
3076 fprintf (dump_file, "ia64 frame related registers "
3077 "recorded in current_frame_info.r[]:\n");
3078 #define PRINTREG(a) if (current_frame_info.r[a]) \
3079 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3081 PRINTREG(reg_save_b0);
3082 PRINTREG(reg_save_pr);
3083 PRINTREG(reg_save_ar_pfs);
3084 PRINTREG(reg_save_ar_unat);
3085 PRINTREG(reg_save_ar_lc);
3086 PRINTREG(reg_save_gp);
3090 /* If there is no epilogue, then we don't need some prologue insns.
3091 We need to avoid emitting the dead prologue insns, because flow
3092 will complain about them. */
3098 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3099 if ((e->flags & EDGE_FAKE) == 0
3100 && (e->flags & EDGE_FALLTHRU) != 0)
3102 epilogue_p = (e != NULL);
3107 /* Set the local, input, and output register names. We need to do this
3108 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3109 half. If we use in/loc/out register names, then we get assembler errors
3110 in crtn.S because there is no alloc insn or regstk directive in there. */
3111 if (! TARGET_REG_NAMES)
3113 int inputs = current_frame_info.n_input_regs;
3114 int locals = current_frame_info.n_local_regs;
3115 int outputs = current_frame_info.n_output_regs;
3117 for (i = 0; i < inputs; i++)
3118 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3119 for (i = 0; i < locals; i++)
3120 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3121 for (i = 0; i < outputs; i++)
3122 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3125 /* Set the frame pointer register name. The regnum is logically loc79,
3126 but of course we'll not have allocated that many locals. Rather than
3127 worrying about renumbering the existing rtxs, we adjust the name. */
3128 /* ??? This code means that we can never use one local register when
3129 there is a frame pointer. loc79 gets wasted in this case, as it is
3130 renamed to a register that will never be used. See also the try_locals
3131 code in find_gr_spill. */
3132 if (current_frame_info.r[reg_fp])
3134 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3135 reg_names[HARD_FRAME_POINTER_REGNUM]
3136 = reg_names[current_frame_info.r[reg_fp]];
3137 reg_names[current_frame_info.r[reg_fp]] = tmp;
3140 /* We don't need an alloc instruction if we've used no outputs or locals. */
3141 if (current_frame_info.n_local_regs == 0
3142 && current_frame_info.n_output_regs == 0
3143 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3144 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3146 /* If there is no alloc, but there are input registers used, then we
3147 need a .regstk directive. */
3148 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3149 ar_pfs_save_reg = NULL_RTX;
3153 current_frame_info.need_regstk = 0;
3155 if (current_frame_info.r[reg_save_ar_pfs])
3157 regno = current_frame_info.r[reg_save_ar_pfs];
3158 reg_emitted (reg_save_ar_pfs);
3161 regno = next_scratch_gr_reg ();
3162 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3164 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3165 GEN_INT (current_frame_info.n_input_regs),
3166 GEN_INT (current_frame_info.n_local_regs),
3167 GEN_INT (current_frame_info.n_output_regs),
3168 GEN_INT (current_frame_info.n_rotate_regs)));
3169 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3172 /* Set up frame pointer, stack pointer, and spill iterators. */
3174 n_varargs = cfun->machine->n_varargs;
3175 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3176 stack_pointer_rtx, 0);
3178 if (frame_pointer_needed)
3180 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3181 RTX_FRAME_RELATED_P (insn) = 1;
3184 if (current_frame_info.total_size != 0)
3186 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3189 if (satisfies_constraint_I (frame_size_rtx))
3190 offset = frame_size_rtx;
3193 regno = next_scratch_gr_reg ();
3194 offset = gen_rtx_REG (DImode, regno);
3195 emit_move_insn (offset, frame_size_rtx);
3198 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3199 stack_pointer_rtx, offset));
3201 if (! frame_pointer_needed)
3203 RTX_FRAME_RELATED_P (insn) = 1;
3204 if (GET_CODE (offset) != CONST_INT)
3205 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3206 gen_rtx_SET (VOIDmode,
3208 gen_rtx_PLUS (DImode,
3213 /* ??? At this point we must generate a magic insn that appears to
3214 modify the stack pointer, the frame pointer, and all spill
3215 iterators. This would allow the most scheduling freedom. For
3216 now, just hard stop. */
3217 emit_insn (gen_blockage ());
3220 /* Must copy out ar.unat before doing any integer spills. */
3221 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3223 if (current_frame_info.r[reg_save_ar_unat])
3226 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3227 reg_emitted (reg_save_ar_unat);
3231 alt_regno = next_scratch_gr_reg ();
3232 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3233 current_frame_info.gr_used_mask |= 1 << alt_regno;
3236 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3237 insn = emit_move_insn (ar_unat_save_reg, reg);
3238 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
3240 /* Even if we're not going to generate an epilogue, we still
3241 need to save the register so that EH works. */
3242 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3243 emit_insn (gen_prologue_use (ar_unat_save_reg));
3246 ar_unat_save_reg = NULL_RTX;
3248 /* Spill all varargs registers. Do this before spilling any GR registers,
3249 since we want the UNAT bits for the GR registers to override the UNAT
3250 bits from varargs, which we don't care about. */
3253 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3255 reg = gen_rtx_REG (DImode, regno);
3256 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3259 /* Locate the bottom of the register save area. */
3260 cfa_off = (current_frame_info.spill_cfa_off
3261 + current_frame_info.spill_size
3262 + current_frame_info.extra_spill_size);
3264 /* Save the predicate register block either in a register or in memory. */
3265 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3267 reg = gen_rtx_REG (DImode, PR_REG (0));
3268 if (current_frame_info.r[reg_save_pr] != 0)
3270 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3271 reg_emitted (reg_save_pr);
3272 insn = emit_move_insn (alt_reg, reg);
3274 /* ??? Denote pr spill/fill by a DImode move that modifies all
3275 64 hard registers. */
3276 RTX_FRAME_RELATED_P (insn) = 1;
3277 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3278 gen_rtx_SET (VOIDmode, alt_reg, reg));
3280 /* Even if we're not going to generate an epilogue, we still
3281 need to save the register so that EH works. */
3283 emit_insn (gen_prologue_use (alt_reg));
3287 alt_regno = next_scratch_gr_reg ();
3288 alt_reg = gen_rtx_REG (DImode, alt_regno);
3289 insn = emit_move_insn (alt_reg, reg);
3290 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3295 /* Handle AR regs in numerical order. All of them get special handling. */
3296 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3297 && current_frame_info.r[reg_save_ar_unat] == 0)
3299 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3300 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3304 /* The alloc insn already copied ar.pfs into a general register. The
3305 only thing we have to do now is copy that register to a stack slot
3306 if we'd not allocated a local register for the job. */
3307 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3308 && current_frame_info.r[reg_save_ar_pfs] == 0)
3310 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3311 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3315 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3317 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3318 if (current_frame_info.r[reg_save_ar_lc] != 0)
3320 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3321 reg_emitted (reg_save_ar_lc);
3322 insn = emit_move_insn (alt_reg, reg);
3323 RTX_FRAME_RELATED_P (insn) = 1;
3325 /* Even if we're not going to generate an epilogue, we still
3326 need to save the register so that EH works. */
3328 emit_insn (gen_prologue_use (alt_reg));
3332 alt_regno = next_scratch_gr_reg ();
3333 alt_reg = gen_rtx_REG (DImode, alt_regno);
3334 emit_move_insn (alt_reg, reg);
3335 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3340 /* Save the return pointer. */
3341 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3343 reg = gen_rtx_REG (DImode, BR_REG (0));
3344 if (current_frame_info.r[reg_save_b0] != 0)
3346 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3347 reg_emitted (reg_save_b0);
3348 insn = emit_move_insn (alt_reg, reg);
3349 RTX_FRAME_RELATED_P (insn) = 1;
3351 /* Even if we're not going to generate an epilogue, we still
3352 need to save the register so that EH works. */
3354 emit_insn (gen_prologue_use (alt_reg));
3358 alt_regno = next_scratch_gr_reg ();
3359 alt_reg = gen_rtx_REG (DImode, alt_regno);
3360 emit_move_insn (alt_reg, reg);
3361 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3366 if (current_frame_info.r[reg_save_gp])
3368 reg_emitted (reg_save_gp);
3369 insn = emit_move_insn (gen_rtx_REG (DImode,
3370 current_frame_info.r[reg_save_gp]),
3371 pic_offset_table_rtx);
3374 /* We should now be at the base of the gr/br/fr spill area. */
3375 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3376 + current_frame_info.spill_size));
3378 /* Spill all general registers. */
3379 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3380 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3382 reg = gen_rtx_REG (DImode, regno);
3383 do_spill (gen_gr_spill, reg, cfa_off, reg);
3387 /* Spill the rest of the BR registers. */
3388 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3389 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3391 alt_regno = next_scratch_gr_reg ();
3392 alt_reg = gen_rtx_REG (DImode, alt_regno);
3393 reg = gen_rtx_REG (DImode, regno);
3394 emit_move_insn (alt_reg, reg);
3395 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3399 /* Align the frame and spill all FR registers. */
3400 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3401 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3403 gcc_assert (!(cfa_off & 15));
3404 reg = gen_rtx_REG (XFmode, regno);
3405 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3409 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3411 finish_spill_pointers ();
3414 /* Called after register allocation to add any instructions needed for the
3415 epilogue. Using an epilogue insn is favored compared to putting all of the
3416 instructions in output_function_prologue(), since it allows the scheduler
3417 to intermix instructions with the saves of the caller saved registers. In
3418 some cases, it might be necessary to emit a barrier instruction as the last
3419 insn to prevent such scheduling. */
3422 ia64_expand_epilogue (int sibcall_p)
3424 rtx insn, reg, alt_reg, ar_unat_save_reg;
3425 int regno, alt_regno, cfa_off;
3427 ia64_compute_frame_size (get_frame_size ());
3429 /* If there is a frame pointer, then we use it instead of the stack
3430 pointer, so that the stack pointer does not need to be valid when
3431 the epilogue starts. See EXIT_IGNORE_STACK. */
3432 if (frame_pointer_needed)
3433 setup_spill_pointers (current_frame_info.n_spilled,
3434 hard_frame_pointer_rtx, 0);
3436 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3437 current_frame_info.total_size);
3439 if (current_frame_info.total_size != 0)
3441 /* ??? At this point we must generate a magic insn that appears to
3442 modify the spill iterators and the frame pointer. This would
3443 allow the most scheduling freedom. For now, just hard stop. */
3444 emit_insn (gen_blockage ());
3447 /* Locate the bottom of the register save area. */
3448 cfa_off = (current_frame_info.spill_cfa_off
3449 + current_frame_info.spill_size
3450 + current_frame_info.extra_spill_size);
3452 /* Restore the predicate registers. */
3453 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3455 if (current_frame_info.r[reg_save_pr] != 0)
3457 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3458 reg_emitted (reg_save_pr);
3462 alt_regno = next_scratch_gr_reg ();
3463 alt_reg = gen_rtx_REG (DImode, alt_regno);
3464 do_restore (gen_movdi_x, alt_reg, cfa_off);
3467 reg = gen_rtx_REG (DImode, PR_REG (0));
3468 emit_move_insn (reg, alt_reg);
3471 /* Restore the application registers. */
3473 /* Load the saved unat from the stack, but do not restore it until
3474 after the GRs have been restored. */
3475 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3477 if (current_frame_info.r[reg_save_ar_unat] != 0)
3480 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3481 reg_emitted (reg_save_ar_unat);
3485 alt_regno = next_scratch_gr_reg ();
3486 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3487 current_frame_info.gr_used_mask |= 1 << alt_regno;
3488 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
<