1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "diagnostic-core.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
63 #include "dwarf2out.h"
66 /* This is used for communication between ASM_OUTPUT_LABEL and
67 ASM_OUTPUT_LABELREF. */
68 int ia64_asm_output_label = 0;
70 /* Register names for ia64_expand_prologue. */
71 static const char * const ia64_reg_numbers[96] =
72 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81 "r104","r105","r106","r107","r108","r109","r110","r111",
82 "r112","r113","r114","r115","r116","r117","r118","r119",
83 "r120","r121","r122","r123","r124","r125","r126","r127"};
85 /* ??? These strings could be shared with REGISTER_NAMES. */
86 static const char * const ia64_input_reg_names[8] =
87 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_local_reg_names[80] =
91 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
102 /* ??? These strings could be shared with REGISTER_NAMES. */
103 static const char * const ia64_output_reg_names[8] =
104 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
106 /* Variables which are this size or smaller are put in the sdata/sbss
109 unsigned int ia64_section_threshold;
111 /* The following variable is used by the DFA insn scheduler. The value is
112 TRUE if we do insn bundling instead of insn scheduling. */
124 number_of_ia64_frame_regs
127 /* Structure to be filled in by ia64_compute_frame_size with register
128 save masks and offsets for the current function. */
130 struct ia64_frame_info
132 HOST_WIDE_INT total_size; /* size of the stack frame, not including
133 the caller's scratch area. */
134 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
135 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
136 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
137 HARD_REG_SET mask; /* mask of saved registers. */
138 unsigned int gr_used_mask; /* mask of registers in use as gr spill
139 registers or long-term scratches. */
140 int n_spilled; /* number of spilled registers. */
141 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
142 int n_input_regs; /* number of input registers used. */
143 int n_local_regs; /* number of local registers used. */
144 int n_output_regs; /* number of output registers used. */
145 int n_rotate_regs; /* number of rotating registers used. */
147 char need_regstk; /* true if a .regstk directive needed. */
148 char initialized; /* true if the data is finalized. */
151 /* Current frame information calculated by ia64_compute_frame_size. */
152 static struct ia64_frame_info current_frame_info;
153 /* The actual registers that are emitted. */
154 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
156 static int ia64_first_cycle_multipass_dfa_lookahead (void);
157 static void ia64_dependencies_evaluation_hook (rtx, rtx);
158 static void ia64_init_dfa_pre_cycle_insn (void);
159 static rtx ia64_dfa_pre_cycle_insn (void);
160 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
161 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
162 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
163 static void ia64_h_i_d_extended (void);
164 static void * ia64_alloc_sched_context (void);
165 static void ia64_init_sched_context (void *, bool);
166 static void ia64_set_sched_context (void *);
167 static void ia64_clear_sched_context (void *);
168 static void ia64_free_sched_context (void *);
169 static int ia64_mode_to_int (enum machine_mode);
170 static void ia64_set_sched_flags (spec_info_t);
171 static ds_t ia64_get_insn_spec_ds (rtx);
172 static ds_t ia64_get_insn_checked_ds (rtx);
173 static bool ia64_skip_rtx_p (const_rtx);
174 static int ia64_speculate_insn (rtx, ds_t, rtx *);
175 static bool ia64_needs_block_p (int);
176 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
177 static int ia64_spec_check_p (rtx);
178 static int ia64_spec_check_src_p (rtx);
179 static rtx gen_tls_get_addr (void);
180 static rtx gen_thread_pointer (void);
181 static int find_gr_spill (enum ia64_frame_regs, int);
182 static int next_scratch_gr_reg (void);
183 static void mark_reg_gr_used_mask (rtx, void *);
184 static void ia64_compute_frame_size (HOST_WIDE_INT);
185 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
186 static void finish_spill_pointers (void);
187 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
188 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
189 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
190 static rtx gen_movdi_x (rtx, rtx, rtx);
191 static rtx gen_fr_spill_x (rtx, rtx, rtx);
192 static rtx gen_fr_restore_x (rtx, rtx, rtx);
194 static void ia64_option_override (void);
195 static bool ia64_can_eliminate (const int, const int);
196 static enum machine_mode hfa_element_mode (const_tree, bool);
197 static void ia64_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
199 static int ia64_arg_partial_bytes (cumulative_args_t, enum machine_mode,
201 static rtx ia64_function_arg_1 (cumulative_args_t, enum machine_mode,
202 const_tree, bool, bool);
203 static rtx ia64_function_arg (cumulative_args_t, enum machine_mode,
205 static rtx ia64_function_incoming_arg (cumulative_args_t,
206 enum machine_mode, const_tree, bool);
207 static void ia64_function_arg_advance (cumulative_args_t, enum machine_mode,
209 static unsigned int ia64_function_arg_boundary (enum machine_mode,
211 static bool ia64_function_ok_for_sibcall (tree, tree);
212 static bool ia64_return_in_memory (const_tree, const_tree);
213 static rtx ia64_function_value (const_tree, const_tree, bool);
214 static rtx ia64_libcall_value (enum machine_mode, const_rtx);
215 static bool ia64_function_value_regno_p (const unsigned int);
216 static int ia64_register_move_cost (enum machine_mode, reg_class_t,
218 static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
220 static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
221 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
222 static void fix_range (const char *);
223 static struct machine_function * ia64_init_machine_status (void);
224 static void emit_insn_group_barriers (FILE *);
225 static void emit_all_insn_group_barriers (FILE *);
226 static void final_emit_insn_group_barriers (FILE *);
227 static void emit_predicate_relation_info (void);
228 static void ia64_reorg (void);
229 static bool ia64_in_small_data_p (const_tree);
230 static void process_epilogue (FILE *, rtx, bool, bool);
232 static bool ia64_assemble_integer (rtx, unsigned int, int);
233 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
234 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
235 static void ia64_output_function_end_prologue (FILE *);
237 static void ia64_print_operand (FILE *, rtx, int);
238 static void ia64_print_operand_address (FILE *, rtx);
239 static bool ia64_print_operand_punct_valid_p (unsigned char code);
241 static int ia64_issue_rate (void);
242 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
243 static void ia64_sched_init (FILE *, int, int);
244 static void ia64_sched_init_global (FILE *, int, int);
245 static void ia64_sched_finish_global (FILE *, int);
246 static void ia64_sched_finish (FILE *, int);
247 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
248 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
249 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
250 static int ia64_variable_issue (FILE *, int, rtx, int);
252 static void ia64_asm_unwind_emit (FILE *, rtx);
253 static void ia64_asm_emit_except_personality (rtx);
254 static void ia64_asm_init_sections (void);
256 static enum unwind_info_type ia64_debug_unwind_info (void);
258 static struct bundle_state *get_free_bundle_state (void);
259 static void free_bundle_state (struct bundle_state *);
260 static void initiate_bundle_states (void);
261 static void finish_bundle_states (void);
262 static unsigned bundle_state_hash (const void *);
263 static int bundle_state_eq_p (const void *, const void *);
264 static int insert_bundle_state (struct bundle_state *);
265 static void initiate_bundle_state_table (void);
266 static void finish_bundle_state_table (void);
267 static int try_issue_nops (struct bundle_state *, int);
268 static int try_issue_insn (struct bundle_state *, rtx);
269 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
270 static int get_max_pos (state_t);
271 static int get_template (state_t, int);
273 static rtx get_next_important_insn (rtx, rtx);
274 static bool important_for_bundling_p (rtx);
275 static void bundling (FILE *, int, rtx, rtx);
277 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
278 HOST_WIDE_INT, tree);
279 static void ia64_file_start (void);
280 static void ia64_globalize_decl_name (FILE *, tree);
282 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
283 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
284 static section *ia64_select_rtx_section (enum machine_mode, rtx,
285 unsigned HOST_WIDE_INT);
286 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
288 static unsigned int ia64_section_type_flags (tree, const char *, int);
289 static void ia64_init_libfuncs (void)
291 static void ia64_hpux_init_libfuncs (void)
293 static void ia64_sysv4_init_libfuncs (void)
295 static void ia64_vms_init_libfuncs (void)
297 static void ia64_soft_fp_init_libfuncs (void)
299 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
301 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
304 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
305 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
306 static void ia64_encode_section_info (tree, rtx, int);
307 static rtx ia64_struct_value_rtx (tree, int);
308 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
309 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
310 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
311 static bool ia64_legitimate_constant_p (enum machine_mode, rtx);
312 static bool ia64_legitimate_address_p (enum machine_mode, rtx, bool);
313 static bool ia64_cannot_force_const_mem (enum machine_mode, rtx);
314 static const char *ia64_mangle_type (const_tree);
315 static const char *ia64_invalid_conversion (const_tree, const_tree);
316 static const char *ia64_invalid_unary_op (int, const_tree);
317 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
318 static enum machine_mode ia64_c_mode_for_suffix (char);
319 static void ia64_trampoline_init (rtx, tree, rtx);
320 static void ia64_override_options_after_change (void);
322 static tree ia64_builtin_decl (unsigned, bool);
324 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
325 static enum machine_mode ia64_get_reg_raw_mode (int regno);
326 static section * ia64_hpux_function_section (tree, enum node_frequency,
329 static bool ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
330 const unsigned char *sel);
332 #define MAX_VECT_LEN 8
334 struct expand_vec_perm_d
336 rtx target, op0, op1;
337 unsigned char perm[MAX_VECT_LEN];
338 enum machine_mode vmode;
344 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
347 /* Table of valid machine attributes. */
348 static const struct attribute_spec ia64_attribute_table[] =
350 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
351 affects_type_identity } */
352 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
353 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
355 #if TARGET_ABI_OPEN_VMS
356 { "common_object", 1, 1, true, false, false,
357 ia64_vms_common_object_attribute, false },
359 { "version_id", 1, 1, true, false, false,
360 ia64_handle_version_id_attribute, false },
361 { NULL, 0, 0, false, false, false, NULL, false }
364 /* Initialize the GCC target structure. */
365 #undef TARGET_ATTRIBUTE_TABLE
366 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
368 #undef TARGET_INIT_BUILTINS
369 #define TARGET_INIT_BUILTINS ia64_init_builtins
371 #undef TARGET_EXPAND_BUILTIN
372 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
374 #undef TARGET_BUILTIN_DECL
375 #define TARGET_BUILTIN_DECL ia64_builtin_decl
377 #undef TARGET_ASM_BYTE_OP
378 #define TARGET_ASM_BYTE_OP "\tdata1\t"
379 #undef TARGET_ASM_ALIGNED_HI_OP
380 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
381 #undef TARGET_ASM_ALIGNED_SI_OP
382 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
383 #undef TARGET_ASM_ALIGNED_DI_OP
384 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
385 #undef TARGET_ASM_UNALIGNED_HI_OP
386 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
387 #undef TARGET_ASM_UNALIGNED_SI_OP
388 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
389 #undef TARGET_ASM_UNALIGNED_DI_OP
390 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
391 #undef TARGET_ASM_INTEGER
392 #define TARGET_ASM_INTEGER ia64_assemble_integer
394 #undef TARGET_OPTION_OVERRIDE
395 #define TARGET_OPTION_OVERRIDE ia64_option_override
397 #undef TARGET_ASM_FUNCTION_PROLOGUE
398 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
399 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
400 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
401 #undef TARGET_ASM_FUNCTION_EPILOGUE
402 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
404 #undef TARGET_PRINT_OPERAND
405 #define TARGET_PRINT_OPERAND ia64_print_operand
406 #undef TARGET_PRINT_OPERAND_ADDRESS
407 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
408 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
409 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
411 #undef TARGET_IN_SMALL_DATA_P
412 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
414 #undef TARGET_SCHED_ADJUST_COST_2
415 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
416 #undef TARGET_SCHED_ISSUE_RATE
417 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
418 #undef TARGET_SCHED_VARIABLE_ISSUE
419 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
420 #undef TARGET_SCHED_INIT
421 #define TARGET_SCHED_INIT ia64_sched_init
422 #undef TARGET_SCHED_FINISH
423 #define TARGET_SCHED_FINISH ia64_sched_finish
424 #undef TARGET_SCHED_INIT_GLOBAL
425 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
426 #undef TARGET_SCHED_FINISH_GLOBAL
427 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
428 #undef TARGET_SCHED_REORDER
429 #define TARGET_SCHED_REORDER ia64_sched_reorder
430 #undef TARGET_SCHED_REORDER2
431 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
433 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
434 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
436 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
437 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
439 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
440 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
441 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
442 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
444 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
445 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
446 ia64_first_cycle_multipass_dfa_lookahead_guard
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
451 #undef TARGET_SCHED_H_I_D_EXTENDED
452 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
454 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
455 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
457 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
458 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
460 #undef TARGET_SCHED_SET_SCHED_CONTEXT
461 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
463 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
464 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
466 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
467 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
469 #undef TARGET_SCHED_SET_SCHED_FLAGS
470 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
472 #undef TARGET_SCHED_GET_INSN_SPEC_DS
473 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
475 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
476 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
478 #undef TARGET_SCHED_SPECULATE_INSN
479 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
481 #undef TARGET_SCHED_NEEDS_BLOCK_P
482 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
484 #undef TARGET_SCHED_GEN_SPEC_CHECK
485 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
487 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
488 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
489 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
491 #undef TARGET_SCHED_SKIP_RTX_P
492 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
494 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
495 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
496 #undef TARGET_ARG_PARTIAL_BYTES
497 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
498 #undef TARGET_FUNCTION_ARG
499 #define TARGET_FUNCTION_ARG ia64_function_arg
500 #undef TARGET_FUNCTION_INCOMING_ARG
501 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
502 #undef TARGET_FUNCTION_ARG_ADVANCE
503 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
504 #undef TARGET_FUNCTION_ARG_BOUNDARY
505 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
507 #undef TARGET_ASM_OUTPUT_MI_THUNK
508 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
509 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
512 #undef TARGET_ASM_FILE_START
513 #define TARGET_ASM_FILE_START ia64_file_start
515 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
516 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
518 #undef TARGET_REGISTER_MOVE_COST
519 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
520 #undef TARGET_MEMORY_MOVE_COST
521 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
522 #undef TARGET_RTX_COSTS
523 #define TARGET_RTX_COSTS ia64_rtx_costs
524 #undef TARGET_ADDRESS_COST
525 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
527 #undef TARGET_UNSPEC_MAY_TRAP_P
528 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
530 #undef TARGET_MACHINE_DEPENDENT_REORG
531 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
533 #undef TARGET_ENCODE_SECTION_INFO
534 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
536 #undef TARGET_SECTION_TYPE_FLAGS
537 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
540 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
541 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
544 /* ??? Investigate. */
546 #undef TARGET_PROMOTE_PROTOTYPES
547 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
550 #undef TARGET_FUNCTION_VALUE
551 #define TARGET_FUNCTION_VALUE ia64_function_value
552 #undef TARGET_LIBCALL_VALUE
553 #define TARGET_LIBCALL_VALUE ia64_libcall_value
554 #undef TARGET_FUNCTION_VALUE_REGNO_P
555 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
557 #undef TARGET_STRUCT_VALUE_RTX
558 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
559 #undef TARGET_RETURN_IN_MEMORY
560 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
563 #undef TARGET_STRICT_ARGUMENT_NAMING
564 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
565 #undef TARGET_MUST_PASS_IN_STACK
566 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
567 #undef TARGET_GET_RAW_RESULT_MODE
568 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
569 #undef TARGET_GET_RAW_ARG_MODE
570 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
572 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
573 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
575 #undef TARGET_ASM_UNWIND_EMIT
576 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
577 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
578 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
579 #undef TARGET_ASM_INIT_SECTIONS
580 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
582 #undef TARGET_DEBUG_UNWIND_INFO
583 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
585 #undef TARGET_SCALAR_MODE_SUPPORTED_P
586 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
587 #undef TARGET_VECTOR_MODE_SUPPORTED_P
588 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
590 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
591 in an order different from the specified program order. */
592 #undef TARGET_RELAXED_ORDERING
593 #define TARGET_RELAXED_ORDERING true
595 #undef TARGET_LEGITIMATE_CONSTANT_P
596 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
597 #undef TARGET_LEGITIMATE_ADDRESS_P
598 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
600 #undef TARGET_CANNOT_FORCE_CONST_MEM
601 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
603 #undef TARGET_MANGLE_TYPE
604 #define TARGET_MANGLE_TYPE ia64_mangle_type
606 #undef TARGET_INVALID_CONVERSION
607 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
608 #undef TARGET_INVALID_UNARY_OP
609 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
610 #undef TARGET_INVALID_BINARY_OP
611 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
613 #undef TARGET_C_MODE_FOR_SUFFIX
614 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
616 #undef TARGET_CAN_ELIMINATE
617 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
619 #undef TARGET_TRAMPOLINE_INIT
620 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
622 #undef TARGET_INVALID_WITHIN_DOLOOP
623 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
625 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
626 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
628 #undef TARGET_PREFERRED_RELOAD_CLASS
629 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
631 #undef TARGET_DELAY_SCHED2
632 #define TARGET_DELAY_SCHED2 true
634 /* Variable tracking should be run after all optimizations which
635 change order of insns. It also needs a valid CFG. */
636 #undef TARGET_DELAY_VARTRACK
637 #define TARGET_DELAY_VARTRACK true
639 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
640 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
642 struct gcc_target targetm = TARGET_INITIALIZER;
646 ADDR_AREA_NORMAL, /* normal address area */
647 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
651 static GTY(()) tree small_ident1;
652 static GTY(()) tree small_ident2;
657 if (small_ident1 == 0)
659 small_ident1 = get_identifier ("small");
660 small_ident2 = get_identifier ("__small__");
664 /* Retrieve the address area that has been chosen for the given decl. */
666 static ia64_addr_area
667 ia64_get_addr_area (tree decl)
671 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
677 id = TREE_VALUE (TREE_VALUE (model_attr));
678 if (id == small_ident1 || id == small_ident2)
679 return ADDR_AREA_SMALL;
681 return ADDR_AREA_NORMAL;
685 ia64_handle_model_attribute (tree *node, tree name, tree args,
686 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
688 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
690 tree arg, decl = *node;
693 arg = TREE_VALUE (args);
694 if (arg == small_ident1 || arg == small_ident2)
696 addr_area = ADDR_AREA_SMALL;
700 warning (OPT_Wattributes, "invalid argument of %qE attribute",
702 *no_add_attrs = true;
705 switch (TREE_CODE (decl))
708 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
710 && !TREE_STATIC (decl))
712 error_at (DECL_SOURCE_LOCATION (decl),
713 "an address area attribute cannot be specified for "
715 *no_add_attrs = true;
717 area = ia64_get_addr_area (decl);
718 if (area != ADDR_AREA_NORMAL && addr_area != area)
720 error ("address area of %q+D conflicts with previous "
721 "declaration", decl);
722 *no_add_attrs = true;
727 error_at (DECL_SOURCE_LOCATION (decl),
728 "address area attribute cannot be specified for "
730 *no_add_attrs = true;
734 warning (OPT_Wattributes, "%qE attribute ignored",
736 *no_add_attrs = true;
743 /* The section must have global and overlaid attributes. */
744 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
746 /* Part of the low level implementation of DEC Ada pragma Common_Object which
747 enables the shared use of variables stored in overlaid linker areas
748 corresponding to the use of Fortran COMMON. */
751 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
752 int flags ATTRIBUTE_UNUSED,
760 DECL_COMMON (decl) = 1;
761 id = TREE_VALUE (args);
762 if (TREE_CODE (id) == IDENTIFIER_NODE)
763 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
764 else if (TREE_CODE (id) == STRING_CST)
768 warning (OPT_Wattributes,
769 "%qE attribute requires a string constant argument", name);
770 *no_add_attrs = true;
773 DECL_SECTION_NAME (decl) = val;
777 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
780 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
781 unsigned HOST_WIDE_INT size,
784 tree attr = DECL_ATTRIBUTES (decl);
786 /* As common_object attribute set DECL_SECTION_NAME check it before
787 looking up the attribute. */
788 if (DECL_SECTION_NAME (decl) && attr)
789 attr = lookup_attribute ("common_object", attr);
795 /* Code from elfos.h. */
796 fprintf (file, "%s", COMMON_ASM_OP);
797 assemble_name (file, name);
798 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
799 size, align / BITS_PER_UNIT);
803 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
804 ASM_OUTPUT_LABEL (file, name);
805 ASM_OUTPUT_SKIP (file, size ? size : 1);
809 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
812 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
815 if (!(flags & SECTION_VMS_OVERLAY))
817 default_elf_asm_named_section (name, flags, decl);
820 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
823 if (flags & SECTION_DECLARED)
825 fprintf (asm_out_file, "\t.section\t%s\n", name);
829 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
833 ia64_encode_addr_area (tree decl, rtx symbol)
837 flags = SYMBOL_REF_FLAGS (symbol);
838 switch (ia64_get_addr_area (decl))
840 case ADDR_AREA_NORMAL: break;
841 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
842 default: gcc_unreachable ();
844 SYMBOL_REF_FLAGS (symbol) = flags;
848 ia64_encode_section_info (tree decl, rtx rtl, int first)
850 default_encode_section_info (decl, rtl, first);
852 /* Careful not to prod global register variables. */
853 if (TREE_CODE (decl) == VAR_DECL
854 && GET_CODE (DECL_RTL (decl)) == MEM
855 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
856 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
857 ia64_encode_addr_area (decl, XEXP (rtl, 0));
860 /* Return 1 if the operands of a move are ok. */
863 ia64_move_ok (rtx dst, rtx src)
865 /* If we're under init_recog_no_volatile, we'll not be able to use
866 memory_operand. So check the code directly and don't worry about
867 the validity of the underlying address, which should have been
868 checked elsewhere anyway. */
869 if (GET_CODE (dst) != MEM)
871 if (GET_CODE (src) == MEM)
873 if (register_operand (src, VOIDmode))
876 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
877 if (INTEGRAL_MODE_P (GET_MODE (dst)))
878 return src == const0_rtx;
880 return satisfies_constraint_G (src);
883 /* Return 1 if the operands are ok for a floating point load pair. */
886 ia64_load_pair_ok (rtx dst, rtx src)
888 /* ??? There is a thinko in the implementation of the "x" constraint and the
889 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
890 also return false for it. */
891 if (GET_CODE (dst) != REG
892 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
894 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
896 switch (GET_CODE (XEXP (src, 0)))
905 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
907 if (GET_CODE (adjust) != CONST_INT
908 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
919 addp4_optimize_ok (rtx op1, rtx op2)
921 return (basereg_operand (op1, GET_MODE(op1)) !=
922 basereg_operand (op2, GET_MODE(op2)));
925 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
926 Return the length of the field, or <= 0 on failure. */
929 ia64_depz_field_mask (rtx rop, rtx rshift)
931 unsigned HOST_WIDE_INT op = INTVAL (rop);
932 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
934 /* Get rid of the zero bits we're shifting in. */
937 /* We must now have a solid block of 1's at bit 0. */
938 return exact_log2 (op + 1);
941 /* Return the TLS model to use for ADDR. */
943 static enum tls_model
944 tls_symbolic_operand_type (rtx addr)
946 enum tls_model tls_kind = TLS_MODEL_NONE;
948 if (GET_CODE (addr) == CONST)
950 if (GET_CODE (XEXP (addr, 0)) == PLUS
951 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
952 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
954 else if (GET_CODE (addr) == SYMBOL_REF)
955 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
960 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
961 as a base register. */
964 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
967 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
970 && (GENERAL_REGNO_P (REGNO (reg))
971 || !HARD_REGISTER_P (reg)))
978 ia64_legitimate_address_reg (const_rtx reg, bool strict)
980 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
981 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
982 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
989 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
991 if (GET_CODE (disp) == PLUS
992 && rtx_equal_p (reg, XEXP (disp, 0))
993 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
994 || (CONST_INT_P (XEXP (disp, 1))
995 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1001 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1004 ia64_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
1007 if (ia64_legitimate_address_reg (x, strict))
1009 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1010 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1011 && XEXP (x, 0) != arg_pointer_rtx)
1013 else if (GET_CODE (x) == POST_MODIFY
1014 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1015 && XEXP (x, 0) != arg_pointer_rtx
1016 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1022 /* Return true if X is a constant that is valid for some immediate
1023 field in an instruction. */
1026 ia64_legitimate_constant_p (enum machine_mode mode, rtx x)
1028 switch (GET_CODE (x))
1035 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1037 return satisfies_constraint_G (x);
1041 /* ??? Short term workaround for PR 28490. We must make the code here
1042 match the code in ia64_expand_move and move_operand, even though they
1043 are both technically wrong. */
1044 if (tls_symbolic_operand_type (x) == 0)
1046 HOST_WIDE_INT addend = 0;
1049 if (GET_CODE (op) == CONST
1050 && GET_CODE (XEXP (op, 0)) == PLUS
1051 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1053 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1054 op = XEXP (XEXP (op, 0), 0);
1057 if (any_offset_symbol_operand (op, mode)
1058 || function_operand (op, mode))
1060 if (aligned_offset_symbol_operand (op, mode))
1061 return (addend & 0x3fff) == 0;
1067 if (mode == V2SFmode)
1068 return satisfies_constraint_Y (x);
1070 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1071 && GET_MODE_SIZE (mode) <= 8);
1078 /* Don't allow TLS addresses to get spilled to memory. */
1081 ia64_cannot_force_const_mem (enum machine_mode mode, rtx x)
1085 return tls_symbolic_operand_type (x) != 0;
1088 /* Expand a symbolic constant load. */
1091 ia64_expand_load_address (rtx dest, rtx src)
1093 gcc_assert (GET_CODE (dest) == REG);
1095 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1096 having to pointer-extend the value afterward. Other forms of address
1097 computation below are also more natural to compute as 64-bit quantities.
1098 If we've been given an SImode destination register, change it. */
1099 if (GET_MODE (dest) != Pmode)
1100 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1101 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1105 if (small_addr_symbolic_operand (src, VOIDmode))
1108 if (TARGET_AUTO_PIC)
1109 emit_insn (gen_load_gprel64 (dest, src));
1110 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1111 emit_insn (gen_load_fptr (dest, src));
1112 else if (sdata_symbolic_operand (src, VOIDmode))
1113 emit_insn (gen_load_gprel (dest, src));
1116 HOST_WIDE_INT addend = 0;
1119 /* We did split constant offsets in ia64_expand_move, and we did try
1120 to keep them split in move_operand, but we also allowed reload to
1121 rematerialize arbitrary constants rather than spill the value to
1122 the stack and reload it. So we have to be prepared here to split
1123 them apart again. */
1124 if (GET_CODE (src) == CONST)
1126 HOST_WIDE_INT hi, lo;
1128 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1129 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1135 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1139 tmp = gen_rtx_HIGH (Pmode, src);
1140 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1141 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1143 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1144 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1148 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1149 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1156 static GTY(()) rtx gen_tls_tga;
1158 gen_tls_get_addr (void)
1161 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1165 static GTY(()) rtx thread_pointer_rtx;
1167 gen_thread_pointer (void)
1169 if (!thread_pointer_rtx)
1170 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1171 return thread_pointer_rtx;
1175 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1176 rtx orig_op1, HOST_WIDE_INT addend)
1178 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1180 HOST_WIDE_INT addend_lo, addend_hi;
1184 case TLS_MODEL_GLOBAL_DYNAMIC:
1187 tga_op1 = gen_reg_rtx (Pmode);
1188 emit_insn (gen_load_dtpmod (tga_op1, op1));
1190 tga_op2 = gen_reg_rtx (Pmode);
1191 emit_insn (gen_load_dtprel (tga_op2, op1));
1193 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1194 LCT_CONST, Pmode, 2, tga_op1,
1195 Pmode, tga_op2, Pmode);
1197 insns = get_insns ();
1200 if (GET_MODE (op0) != Pmode)
1202 emit_libcall_block (insns, op0, tga_ret, op1);
1205 case TLS_MODEL_LOCAL_DYNAMIC:
1206 /* ??? This isn't the completely proper way to do local-dynamic
1207 If the call to __tls_get_addr is used only by a single symbol,
1208 then we should (somehow) move the dtprel to the second arg
1209 to avoid the extra add. */
1212 tga_op1 = gen_reg_rtx (Pmode);
1213 emit_insn (gen_load_dtpmod (tga_op1, op1));
1215 tga_op2 = const0_rtx;
1217 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1218 LCT_CONST, Pmode, 2, tga_op1,
1219 Pmode, tga_op2, Pmode);
1221 insns = get_insns ();
1224 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1226 tmp = gen_reg_rtx (Pmode);
1227 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1229 if (!register_operand (op0, Pmode))
1230 op0 = gen_reg_rtx (Pmode);
1233 emit_insn (gen_load_dtprel (op0, op1));
1234 emit_insn (gen_adddi3 (op0, tmp, op0));
1237 emit_insn (gen_add_dtprel (op0, op1, tmp));
1240 case TLS_MODEL_INITIAL_EXEC:
1241 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1242 addend_hi = addend - addend_lo;
1244 op1 = plus_constant (op1, addend_hi);
1247 tmp = gen_reg_rtx (Pmode);
1248 emit_insn (gen_load_tprel (tmp, op1));
1250 if (!register_operand (op0, Pmode))
1251 op0 = gen_reg_rtx (Pmode);
1252 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1255 case TLS_MODEL_LOCAL_EXEC:
1256 if (!register_operand (op0, Pmode))
1257 op0 = gen_reg_rtx (Pmode);
1263 emit_insn (gen_load_tprel (op0, op1));
1264 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1267 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1275 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1276 orig_op0, 1, OPTAB_DIRECT);
1277 if (orig_op0 == op0)
1279 if (GET_MODE (orig_op0) == Pmode)
1281 return gen_lowpart (GET_MODE (orig_op0), op0);
1285 ia64_expand_move (rtx op0, rtx op1)
1287 enum machine_mode mode = GET_MODE (op0);
1289 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1290 op1 = force_reg (mode, op1);
1292 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1294 HOST_WIDE_INT addend = 0;
1295 enum tls_model tls_kind;
1298 if (GET_CODE (op1) == CONST
1299 && GET_CODE (XEXP (op1, 0)) == PLUS
1300 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1302 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1303 sym = XEXP (XEXP (op1, 0), 0);
1306 tls_kind = tls_symbolic_operand_type (sym);
1308 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1310 if (any_offset_symbol_operand (sym, mode))
1312 else if (aligned_offset_symbol_operand (sym, mode))
1314 HOST_WIDE_INT addend_lo, addend_hi;
1316 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1317 addend_hi = addend - addend_lo;
1321 op1 = plus_constant (sym, addend_hi);
1330 if (reload_completed)
1332 /* We really should have taken care of this offset earlier. */
1333 gcc_assert (addend == 0);
1334 if (ia64_expand_load_address (op0, op1))
1340 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1342 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1344 op1 = expand_simple_binop (mode, PLUS, subtarget,
1345 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1354 /* Split a move from OP1 to OP0 conditional on COND. */
1357 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1359 rtx insn, first = get_last_insn ();
1361 emit_move_insn (op0, op1);
1363 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1365 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1369 /* Split a post-reload TImode or TFmode reference into two DImode
1370 components. This is made extra difficult by the fact that we do
1371 not get any scratch registers to work with, because reload cannot
1372 be prevented from giving us a scratch that overlaps the register
1373 pair involved. So instead, when addressing memory, we tweak the
1374 pointer register up and back down with POST_INCs. Or up and not
1375 back down when we can get away with it.
1377 REVERSED is true when the loads must be done in reversed order
1378 (high word first) for correctness. DEAD is true when the pointer
1379 dies with the second insn we generate and therefore the second
1380 address must not carry a postmodify.
1382 May return an insn which is to be emitted after the moves. */
1385 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1389 switch (GET_CODE (in))
1392 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1393 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1398 /* Cannot occur reversed. */
1399 gcc_assert (!reversed);
1401 if (GET_MODE (in) != TFmode)
1402 split_double (in, &out[0], &out[1]);
1404 /* split_double does not understand how to split a TFmode
1405 quantity into a pair of DImode constants. */
1408 unsigned HOST_WIDE_INT p[2];
1409 long l[4]; /* TFmode is 128 bits */
1411 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1412 real_to_target (l, &r, TFmode);
1414 if (FLOAT_WORDS_BIG_ENDIAN)
1416 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1417 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1421 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1422 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1424 out[0] = GEN_INT (p[0]);
1425 out[1] = GEN_INT (p[1]);
1431 rtx base = XEXP (in, 0);
1434 switch (GET_CODE (base))
1439 out[0] = adjust_automodify_address
1440 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1441 out[1] = adjust_automodify_address
1442 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1446 /* Reversal requires a pre-increment, which can only
1447 be done as a separate insn. */
1448 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1449 out[0] = adjust_automodify_address
1450 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1451 out[1] = adjust_address (in, DImode, 0);
1456 gcc_assert (!reversed && !dead);
1458 /* Just do the increment in two steps. */
1459 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1460 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1464 gcc_assert (!reversed && !dead);
1466 /* Add 8, subtract 24. */
1467 base = XEXP (base, 0);
1468 out[0] = adjust_automodify_address
1469 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1470 out[1] = adjust_automodify_address
1472 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1477 gcc_assert (!reversed && !dead);
1479 /* Extract and adjust the modification. This case is
1480 trickier than the others, because we might have an
1481 index register, or we might have a combined offset that
1482 doesn't fit a signed 9-bit displacement field. We can
1483 assume the incoming expression is already legitimate. */
1484 offset = XEXP (base, 1);
1485 base = XEXP (base, 0);
1487 out[0] = adjust_automodify_address
1488 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1490 if (GET_CODE (XEXP (offset, 1)) == REG)
1492 /* Can't adjust the postmodify to match. Emit the
1493 original, then a separate addition insn. */
1494 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1495 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1499 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1500 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1502 /* Again the postmodify cannot be made to match,
1503 but in this case it's more efficient to get rid
1504 of the postmodify entirely and fix up with an
1506 out[1] = adjust_automodify_address (in, DImode, base, 8);
1508 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1512 /* Combined offset still fits in the displacement field.
1513 (We cannot overflow it at the high end.) */
1514 out[1] = adjust_automodify_address
1515 (in, DImode, gen_rtx_POST_MODIFY
1516 (Pmode, base, gen_rtx_PLUS
1518 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1537 /* Split a TImode or TFmode move instruction after reload.
1538 This is used by *movtf_internal and *movti_internal. */
1540 ia64_split_tmode_move (rtx operands[])
1542 rtx in[2], out[2], insn;
1545 bool reversed = false;
1547 /* It is possible for reload to decide to overwrite a pointer with
1548 the value it points to. In that case we have to do the loads in
1549 the appropriate order so that the pointer is not destroyed too
1550 early. Also we must not generate a postmodify for that second
1551 load, or rws_access_regno will die. */
1552 if (GET_CODE (operands[1]) == MEM
1553 && reg_overlap_mentioned_p (operands[0], operands[1]))
1555 rtx base = XEXP (operands[1], 0);
1556 while (GET_CODE (base) != REG)
1557 base = XEXP (base, 0);
1559 if (REGNO (base) == REGNO (operands[0]))
1563 /* Another reason to do the moves in reversed order is if the first
1564 element of the target register pair is also the second element of
1565 the source register pair. */
1566 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1567 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1570 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1571 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1573 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1574 if (GET_CODE (EXP) == MEM \
1575 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1576 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1577 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1578 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1580 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1581 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1582 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1584 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1585 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1586 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1589 emit_insn (fixup[0]);
1591 emit_insn (fixup[1]);
1593 #undef MAYBE_ADD_REG_INC_NOTE
1596 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1597 through memory plus an extra GR scratch register. Except that you can
1598 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1599 SECONDARY_RELOAD_CLASS, but not both.
1601 We got into problems in the first place by allowing a construct like
1602 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1603 This solution attempts to prevent this situation from occurring. When
1604 we see something like the above, we spill the inner register to memory. */
1607 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1609 if (GET_CODE (in) == SUBREG
1610 && GET_MODE (SUBREG_REG (in)) == TImode
1611 && GET_CODE (SUBREG_REG (in)) == REG)
1613 rtx memt = assign_stack_temp (TImode, 16, 0);
1614 emit_move_insn (memt, SUBREG_REG (in));
1615 return adjust_address (memt, mode, 0);
1617 else if (force && GET_CODE (in) == REG)
1619 rtx memx = assign_stack_temp (mode, 16, 0);
1620 emit_move_insn (memx, in);
1627 /* Expand the movxf or movrf pattern (MODE says which) with the given
1628 OPERANDS, returning true if the pattern should then invoke
1632 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1634 rtx op0 = operands[0];
1636 if (GET_CODE (op0) == SUBREG)
1637 op0 = SUBREG_REG (op0);
1639 /* We must support XFmode loads into general registers for stdarg/vararg,
1640 unprototyped calls, and a rare case where a long double is passed as
1641 an argument after a float HFA fills the FP registers. We split them into
1642 DImode loads for convenience. We also need to support XFmode stores
1643 for the last case. This case does not happen for stdarg/vararg routines,
1644 because we do a block store to memory of unnamed arguments. */
1646 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1650 /* We're hoping to transform everything that deals with XFmode
1651 quantities and GR registers early in the compiler. */
1652 gcc_assert (can_create_pseudo_p ());
1654 /* Struct to register can just use TImode instead. */
1655 if ((GET_CODE (operands[1]) == SUBREG
1656 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1657 || (GET_CODE (operands[1]) == REG
1658 && GR_REGNO_P (REGNO (operands[1]))))
1660 rtx op1 = operands[1];
1662 if (GET_CODE (op1) == SUBREG)
1663 op1 = SUBREG_REG (op1);
1665 op1 = gen_rtx_REG (TImode, REGNO (op1));
1667 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1671 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1673 /* Don't word-swap when reading in the constant. */
1674 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1675 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1677 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1678 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1683 /* If the quantity is in a register not known to be GR, spill it. */
1684 if (register_operand (operands[1], mode))
1685 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1687 gcc_assert (GET_CODE (operands[1]) == MEM);
1689 /* Don't word-swap when reading in the value. */
1690 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1691 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1693 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1694 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1698 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1700 /* We're hoping to transform everything that deals with XFmode
1701 quantities and GR registers early in the compiler. */
1702 gcc_assert (can_create_pseudo_p ());
1704 /* Op0 can't be a GR_REG here, as that case is handled above.
1705 If op0 is a register, then we spill op1, so that we now have a
1706 MEM operand. This requires creating an XFmode subreg of a TImode reg
1707 to force the spill. */
1708 if (register_operand (operands[0], mode))
1710 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1711 op1 = gen_rtx_SUBREG (mode, op1, 0);
1712 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1719 gcc_assert (GET_CODE (operands[0]) == MEM);
1721 /* Don't word-swap when writing out the value. */
1722 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1723 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1725 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1726 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1731 if (!reload_in_progress && !reload_completed)
1733 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1735 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1737 rtx memt, memx, in = operands[1];
1738 if (CONSTANT_P (in))
1739 in = validize_mem (force_const_mem (mode, in));
1740 if (GET_CODE (in) == MEM)
1741 memt = adjust_address (in, TImode, 0);
1744 memt = assign_stack_temp (TImode, 16, 0);
1745 memx = adjust_address (memt, mode, 0);
1746 emit_move_insn (memx, in);
1748 emit_move_insn (op0, memt);
1752 if (!ia64_move_ok (operands[0], operands[1]))
1753 operands[1] = force_reg (mode, operands[1]);
1759 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1760 with the expression that holds the compare result (in VOIDmode). */
1762 static GTY(()) rtx cmptf_libfunc;
1765 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1767 enum rtx_code code = GET_CODE (*expr);
1770 /* If we have a BImode input, then we already have a compare result, and
1771 do not need to emit another comparison. */
1772 if (GET_MODE (*op0) == BImode)
1774 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1777 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1778 magic number as its third argument, that indicates what to do.
1779 The return value is an integer to be compared against zero. */
1780 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1783 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1790 enum rtx_code ncode;
1793 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1796 /* 1 = equal, 0 = not equal. Equality operators do
1797 not raise FP_INVALID when given an SNaN operand. */
1798 case EQ: magic = QCMP_EQ; ncode = NE; break;
1799 case NE: magic = QCMP_EQ; ncode = EQ; break;
1800 /* isunordered() from C99. */
1801 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1802 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1803 /* Relational operators raise FP_INVALID when given
1805 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1806 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1807 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1808 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1809 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1810 Expanders for buneq etc. weuld have to be added to ia64.md
1811 for this to be useful. */
1812 default: gcc_unreachable ();
1817 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1818 *op0, TFmode, *op1, TFmode,
1819 GEN_INT (magic), DImode);
1820 cmp = gen_reg_rtx (BImode);
1821 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1822 gen_rtx_fmt_ee (ncode, BImode,
1825 insns = get_insns ();
1828 emit_libcall_block (insns, cmp, cmp,
1829 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1834 cmp = gen_reg_rtx (BImode);
1835 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1836 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1840 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1845 /* Generate an integral vector comparison. Return true if the condition has
1846 been reversed, and so the sense of the comparison should be inverted. */
1849 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1850 rtx dest, rtx op0, rtx op1)
1852 bool negate = false;
1855 /* Canonicalize the comparison to EQ, GT, GTU. */
1866 code = reverse_condition (code);
1872 code = reverse_condition (code);
1878 code = swap_condition (code);
1879 x = op0, op0 = op1, op1 = x;
1886 /* Unsigned parallel compare is not supported by the hardware. Play some
1887 tricks to turn this into a signed comparison against 0. */
1896 /* Subtract (-(INT MAX) - 1) from both operands to make
1898 mask = GEN_INT (0x80000000);
1899 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1900 mask = force_reg (mode, mask);
1901 t1 = gen_reg_rtx (mode);
1902 emit_insn (gen_subv2si3 (t1, op0, mask));
1903 t2 = gen_reg_rtx (mode);
1904 emit_insn (gen_subv2si3 (t2, op1, mask));
1913 /* Perform a parallel unsigned saturating subtraction. */
1914 x = gen_reg_rtx (mode);
1915 emit_insn (gen_rtx_SET (VOIDmode, x,
1916 gen_rtx_US_MINUS (mode, op0, op1)));
1920 op1 = CONST0_RTX (mode);
1929 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1930 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1935 /* Emit an integral vector conditional move. */
1938 ia64_expand_vecint_cmov (rtx operands[])
1940 enum machine_mode mode = GET_MODE (operands[0]);
1941 enum rtx_code code = GET_CODE (operands[3]);
1945 cmp = gen_reg_rtx (mode);
1946 negate = ia64_expand_vecint_compare (code, mode, cmp,
1947 operands[4], operands[5]);
1949 ot = operands[1+negate];
1950 of = operands[2-negate];
1952 if (ot == CONST0_RTX (mode))
1954 if (of == CONST0_RTX (mode))
1956 emit_move_insn (operands[0], ot);
1960 x = gen_rtx_NOT (mode, cmp);
1961 x = gen_rtx_AND (mode, x, of);
1962 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1964 else if (of == CONST0_RTX (mode))
1966 x = gen_rtx_AND (mode, cmp, ot);
1967 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1973 t = gen_reg_rtx (mode);
1974 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1975 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1977 f = gen_reg_rtx (mode);
1978 x = gen_rtx_NOT (mode, cmp);
1979 x = gen_rtx_AND (mode, x, operands[2-negate]);
1980 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1982 x = gen_rtx_IOR (mode, t, f);
1983 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1987 /* Emit an integral vector min or max operation. Return true if all done. */
1990 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1995 /* These four combinations are supported directly. */
1996 if (mode == V8QImode && (code == UMIN || code == UMAX))
1998 if (mode == V4HImode && (code == SMIN || code == SMAX))
2001 /* This combination can be implemented with only saturating subtraction. */
2002 if (mode == V4HImode && code == UMAX)
2004 rtx x, tmp = gen_reg_rtx (mode);
2006 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2007 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
2009 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2013 /* Everything else implemented via vector comparisons. */
2014 xops[0] = operands[0];
2015 xops[4] = xops[1] = operands[1];
2016 xops[5] = xops[2] = operands[2];
2035 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2037 ia64_expand_vecint_cmov (xops);
2041 /* The vectors LO and HI each contain N halves of a double-wide vector.
2042 Reassemble either the first N/2 or the second N/2 elements. */
2045 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2047 enum machine_mode vmode = GET_MODE (lo);
2048 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2049 struct expand_vec_perm_d d;
2052 d.target = gen_lowpart (vmode, out);
2053 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2054 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2057 d.one_operand_p = false;
2058 d.testing_p = false;
2060 high = (highp ? nelt / 2 : 0);
2061 for (i = 0; i < nelt / 2; ++i)
2063 d.perm[i * 2] = i + high;
2064 d.perm[i * 2 + 1] = i + high + nelt;
2067 ok = ia64_expand_vec_perm_const_1 (&d);
2071 /* Return a vector of the sign-extension of VEC. */
2074 ia64_unpack_sign (rtx vec, bool unsignedp)
2076 enum machine_mode mode = GET_MODE (vec);
2077 rtx zero = CONST0_RTX (mode);
2083 rtx sign = gen_reg_rtx (mode);
2086 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2093 /* Emit an integral vector unpack operation. */
2096 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2098 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2099 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2102 /* Emit an integral vector widening sum operations. */
2105 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2107 enum machine_mode wmode;
2110 sign = ia64_unpack_sign (operands[1], unsignedp);
2112 wmode = GET_MODE (operands[0]);
2113 l = gen_reg_rtx (wmode);
2114 h = gen_reg_rtx (wmode);
2116 ia64_unpack_assemble (l, operands[1], sign, false);
2117 ia64_unpack_assemble (h, operands[1], sign, true);
2119 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2120 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2121 if (t != operands[0])
2122 emit_move_insn (operands[0], t);
2125 /* Emit a signed or unsigned V8QI dot product operation. */
2128 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
2130 rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
2131 rtx p1, p2, p3, p4, s1, s2, s3;
2135 sn1 = ia64_unpack_sign (op1, unsignedp);
2136 sn2 = ia64_unpack_sign (op2, unsignedp);
2138 l1 = gen_reg_rtx (V4HImode);
2139 l2 = gen_reg_rtx (V4HImode);
2140 h1 = gen_reg_rtx (V4HImode);
2141 h2 = gen_reg_rtx (V4HImode);
2142 ia64_unpack_assemble (l1, op1, sn1, false);
2143 ia64_unpack_assemble (l2, op2, sn2, false);
2144 ia64_unpack_assemble (h1, op1, sn1, true);
2145 ia64_unpack_assemble (h2, op2, sn2, true);
2147 p1 = gen_reg_rtx (V2SImode);
2148 p2 = gen_reg_rtx (V2SImode);
2149 p3 = gen_reg_rtx (V2SImode);
2150 p4 = gen_reg_rtx (V2SImode);
2151 emit_insn (gen_pmpy2_even (p1, l1, l2));
2152 emit_insn (gen_pmpy2_even (p2, h1, h2));
2153 emit_insn (gen_pmpy2_odd (p3, l1, l2));
2154 emit_insn (gen_pmpy2_odd (p4, h1, h2));
2156 s1 = gen_reg_rtx (V2SImode);
2157 s2 = gen_reg_rtx (V2SImode);
2158 s3 = gen_reg_rtx (V2SImode);
2159 emit_insn (gen_addv2si3 (s1, p1, p2));
2160 emit_insn (gen_addv2si3 (s2, p3, p4));
2161 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2162 emit_insn (gen_addv2si3 (operands[0], s2, s3));
2165 /* Emit the appropriate sequence for a call. */
2168 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2173 addr = XEXP (addr, 0);
2174 addr = convert_memory_address (DImode, addr);
2175 b0 = gen_rtx_REG (DImode, R_BR (0));
2177 /* ??? Should do this for functions known to bind local too. */
2178 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2181 insn = gen_sibcall_nogp (addr);
2183 insn = gen_call_nogp (addr, b0);
2185 insn = gen_call_value_nogp (retval, addr, b0);
2186 insn = emit_call_insn (insn);
2191 insn = gen_sibcall_gp (addr);
2193 insn = gen_call_gp (addr, b0);
2195 insn = gen_call_value_gp (retval, addr, b0);
2196 insn = emit_call_insn (insn);
2198 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2202 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2204 if (TARGET_ABI_OPEN_VMS)
2205 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2206 gen_rtx_REG (DImode, GR_REG (25)));
2210 reg_emitted (enum ia64_frame_regs r)
2212 if (emitted_frame_related_regs[r] == 0)
2213 emitted_frame_related_regs[r] = current_frame_info.r[r];
2215 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2219 get_reg (enum ia64_frame_regs r)
2222 return current_frame_info.r[r];
2226 is_emitted (int regno)
2230 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2231 if (emitted_frame_related_regs[r] == regno)
2237 ia64_reload_gp (void)
2241 if (current_frame_info.r[reg_save_gp])
2243 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2247 HOST_WIDE_INT offset;
2250 offset = (current_frame_info.spill_cfa_off
2251 + current_frame_info.spill_size);
2252 if (frame_pointer_needed)
2254 tmp = hard_frame_pointer_rtx;
2259 tmp = stack_pointer_rtx;
2260 offset = current_frame_info.total_size - offset;
2263 offset_r = GEN_INT (offset);
2264 if (satisfies_constraint_I (offset_r))
2265 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2268 emit_move_insn (pic_offset_table_rtx, offset_r);
2269 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2270 pic_offset_table_rtx, tmp));
2273 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2276 emit_move_insn (pic_offset_table_rtx, tmp);
2280 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2281 rtx scratch_b, int noreturn_p, int sibcall_p)
2284 bool is_desc = false;
2286 /* If we find we're calling through a register, then we're actually
2287 calling through a descriptor, so load up the values. */
2288 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2293 /* ??? We are currently constrained to *not* use peep2, because
2294 we can legitimately change the global lifetime of the GP
2295 (in the form of killing where previously live). This is
2296 because a call through a descriptor doesn't use the previous
2297 value of the GP, while a direct call does, and we do not
2298 commit to either form until the split here.
2300 That said, this means that we lack precise life info for
2301 whether ADDR is dead after this call. This is not terribly
2302 important, since we can fix things up essentially for free
2303 with the POST_DEC below, but it's nice to not use it when we
2304 can immediately tell it's not necessary. */
2305 addr_dead_p = ((noreturn_p || sibcall_p
2306 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2308 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2310 /* Load the code address into scratch_b. */
2311 tmp = gen_rtx_POST_INC (Pmode, addr);
2312 tmp = gen_rtx_MEM (Pmode, tmp);
2313 emit_move_insn (scratch_r, tmp);
2314 emit_move_insn (scratch_b, scratch_r);
2316 /* Load the GP address. If ADDR is not dead here, then we must
2317 revert the change made above via the POST_INCREMENT. */
2319 tmp = gen_rtx_POST_DEC (Pmode, addr);
2322 tmp = gen_rtx_MEM (Pmode, tmp);
2323 emit_move_insn (pic_offset_table_rtx, tmp);
2330 insn = gen_sibcall_nogp (addr);
2332 insn = gen_call_value_nogp (retval, addr, retaddr);
2334 insn = gen_call_nogp (addr, retaddr);
2335 emit_call_insn (insn);
2337 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2341 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2343 This differs from the generic code in that we know about the zero-extending
2344 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2345 also know that ld.acq+cmpxchg.rel equals a full barrier.
2347 The loop we want to generate looks like
2352 new_reg = cmp_reg op val;
2353 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2354 if (cmp_reg != old_reg)
2357 Note that we only do the plain load from memory once. Subsequent
2358 iterations use the value loaded by the compare-and-swap pattern. */
2361 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2362 rtx old_dst, rtx new_dst, enum memmodel model)
2364 enum machine_mode mode = GET_MODE (mem);
2365 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2366 enum insn_code icode;
2368 /* Special case for using fetchadd. */
2369 if ((mode == SImode || mode == DImode)
2370 && (code == PLUS || code == MINUS)
2371 && fetchadd_operand (val, mode))
2374 val = GEN_INT (-INTVAL (val));
2377 old_dst = gen_reg_rtx (mode);
2381 case MEMMODEL_ACQ_REL:
2382 case MEMMODEL_SEQ_CST:
2383 emit_insn (gen_memory_barrier ());
2385 case MEMMODEL_RELAXED:
2386 case MEMMODEL_ACQUIRE:
2387 case MEMMODEL_CONSUME:
2389 icode = CODE_FOR_fetchadd_acq_si;
2391 icode = CODE_FOR_fetchadd_acq_di;
2393 case MEMMODEL_RELEASE:
2395 icode = CODE_FOR_fetchadd_rel_si;
2397 icode = CODE_FOR_fetchadd_rel_di;
2404 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2408 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2410 if (new_reg != new_dst)
2411 emit_move_insn (new_dst, new_reg);
2416 /* Because of the volatile mem read, we get an ld.acq, which is the
2417 front half of the full barrier. The end half is the cmpxchg.rel.
2418 For relaxed and release memory models, we don't need this. But we
2419 also don't bother trying to prevent it either. */
2420 gcc_assert (model == MEMMODEL_RELAXED
2421 || model == MEMMODEL_RELEASE
2422 || MEM_VOLATILE_P (mem));
2424 old_reg = gen_reg_rtx (DImode);
2425 cmp_reg = gen_reg_rtx (DImode);
2426 label = gen_label_rtx ();
2430 val = simplify_gen_subreg (DImode, val, mode, 0);
2431 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2434 emit_move_insn (cmp_reg, mem);
2438 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2439 emit_move_insn (old_reg, cmp_reg);
2440 emit_move_insn (ar_ccv, cmp_reg);
2443 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2448 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2449 true, OPTAB_DIRECT);
2450 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2453 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2454 true, OPTAB_DIRECT);
2457 new_reg = gen_lowpart (mode, new_reg);
2459 emit_move_insn (new_dst, new_reg);
2463 case MEMMODEL_RELAXED:
2464 case MEMMODEL_ACQUIRE:
2465 case MEMMODEL_CONSUME:
2468 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2469 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2470 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2471 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2477 case MEMMODEL_RELEASE:
2478 case MEMMODEL_ACQ_REL:
2479 case MEMMODEL_SEQ_CST:
2482 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2483 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2484 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2485 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2495 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2497 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2500 /* Begin the assembly file. */
2503 ia64_file_start (void)
2505 default_file_start ();
2506 emit_safe_across_calls ();
2510 emit_safe_across_calls (void)
2512 unsigned int rs, re;
2519 while (rs < 64 && call_used_regs[PR_REG (rs)])
2523 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2527 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2531 fputc (',', asm_out_file);
2533 fprintf (asm_out_file, "p%u", rs);
2535 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2539 fputc ('\n', asm_out_file);
2542 /* Globalize a declaration. */
2545 ia64_globalize_decl_name (FILE * stream, tree decl)
2547 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2548 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2551 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2552 const char *p = TREE_STRING_POINTER (v);
2553 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2555 targetm.asm_out.globalize_label (stream, name);
2556 if (TREE_CODE (decl) == FUNCTION_DECL)
2557 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2560 /* Helper function for ia64_compute_frame_size: find an appropriate general
2561 register to spill some special register to. SPECIAL_SPILL_MASK contains
2562 bits in GR0 to GR31 that have already been allocated by this routine.
2563 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2566 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2570 if (emitted_frame_related_regs[r] != 0)
2572 regno = emitted_frame_related_regs[r];
2573 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2574 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2575 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2576 else if (current_function_is_leaf
2577 && regno >= GR_REG (1) && regno <= GR_REG (31))
2578 current_frame_info.gr_used_mask |= 1 << regno;
2583 /* If this is a leaf function, first try an otherwise unused
2584 call-clobbered register. */
2585 if (current_function_is_leaf)
2587 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2588 if (! df_regs_ever_live_p (regno)
2589 && call_used_regs[regno]
2590 && ! fixed_regs[regno]
2591 && ! global_regs[regno]
2592 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2593 && ! is_emitted (regno))
2595 current_frame_info.gr_used_mask |= 1 << regno;
2602 regno = current_frame_info.n_local_regs;
2603 /* If there is a frame pointer, then we can't use loc79, because
2604 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2605 reg_name switching code in ia64_expand_prologue. */
2606 while (regno < (80 - frame_pointer_needed))
2607 if (! is_emitted (LOC_REG (regno++)))
2609 current_frame_info.n_local_regs = regno;
2610 return LOC_REG (regno - 1);
2614 /* Failed to find a general register to spill to. Must use stack. */
2618 /* In order to make for nice schedules, we try to allocate every temporary
2619 to a different register. We must of course stay away from call-saved,
2620 fixed, and global registers. We must also stay away from registers
2621 allocated in current_frame_info.gr_used_mask, since those include regs
2622 used all through the prologue.
2624 Any register allocated here must be used immediately. The idea is to
2625 aid scheduling, not to solve data flow problems. */
2627 static int last_scratch_gr_reg;
2630 next_scratch_gr_reg (void)
2634 for (i = 0; i < 32; ++i)
2636 regno = (last_scratch_gr_reg + i + 1) & 31;
2637 if (call_used_regs[regno]
2638 && ! fixed_regs[regno]
2639 && ! global_regs[regno]
2640 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2642 last_scratch_gr_reg = regno;
2647 /* There must be _something_ available. */
2651 /* Helper function for ia64_compute_frame_size, called through
2652 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2655 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2657 unsigned int regno = REGNO (reg);
2660 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2661 for (i = 0; i < n; ++i)
2662 current_frame_info.gr_used_mask |= 1 << (regno + i);
2667 /* Returns the number of bytes offset between the frame pointer and the stack
2668 pointer for the current function. SIZE is the number of bytes of space
2669 needed for local variables. */
2672 ia64_compute_frame_size (HOST_WIDE_INT size)
2674 HOST_WIDE_INT total_size;
2675 HOST_WIDE_INT spill_size = 0;
2676 HOST_WIDE_INT extra_spill_size = 0;
2677 HOST_WIDE_INT pretend_args_size;
2680 int spilled_gr_p = 0;
2681 int spilled_fr_p = 0;
2687 if (current_frame_info.initialized)
2690 memset (¤t_frame_info, 0, sizeof current_frame_info);
2691 CLEAR_HARD_REG_SET (mask);
2693 /* Don't allocate scratches to the return register. */
2694 diddle_return_value (mark_reg_gr_used_mask, NULL);
2696 /* Don't allocate scratches to the EH scratch registers. */
2697 if (cfun->machine->ia64_eh_epilogue_sp)
2698 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2699 if (cfun->machine->ia64_eh_epilogue_bsp)
2700 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2702 /* Find the size of the register stack frame. We have only 80 local
2703 registers, because we reserve 8 for the inputs and 8 for the
2706 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2707 since we'll be adjusting that down later. */
2708 regno = LOC_REG (78) + ! frame_pointer_needed;
2709 for (; regno >= LOC_REG (0); regno--)
2710 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2712 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2714 /* For functions marked with the syscall_linkage attribute, we must mark
2715 all eight input registers as in use, so that locals aren't visible to
2718 if (cfun->machine->n_varargs > 0
2719 || lookup_attribute ("syscall_linkage",
2720 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2721 current_frame_info.n_input_regs = 8;
2724 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2725 if (df_regs_ever_live_p (regno))
2727 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2730 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2731 if (df_regs_ever_live_p (regno))
2733 i = regno - OUT_REG (0) + 1;
2735 #ifndef PROFILE_HOOK
2736 /* When -p profiling, we need one output register for the mcount argument.
2737 Likewise for -a profiling for the bb_init_func argument. For -ax
2738 profiling, we need two output registers for the two bb_init_trace_func
2743 current_frame_info.n_output_regs = i;
2745 /* ??? No rotating register support yet. */
2746 current_frame_info.n_rotate_regs = 0;
2748 /* Discover which registers need spilling, and how much room that
2749 will take. Begin with floating point and general registers,
2750 which will always wind up on the stack. */
2752 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2753 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2755 SET_HARD_REG_BIT (mask, regno);
2761 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2762 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2764 SET_HARD_REG_BIT (mask, regno);
2770 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2771 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2773 SET_HARD_REG_BIT (mask, regno);
2778 /* Now come all special registers that might get saved in other
2779 general registers. */
2781 if (frame_pointer_needed)
2783 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2784 /* If we did not get a register, then we take LOC79. This is guaranteed
2785 to be free, even if regs_ever_live is already set, because this is
2786 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2787 as we don't count loc79 above. */
2788 if (current_frame_info.r[reg_fp] == 0)
2790 current_frame_info.r[reg_fp] = LOC_REG (79);
2791 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2795 if (! current_function_is_leaf)
2797 /* Emit a save of BR0 if we call other functions. Do this even
2798 if this function doesn't return, as EH depends on this to be
2799 able to unwind the stack. */
2800 SET_HARD_REG_BIT (mask, BR_REG (0));
2802 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2803 if (current_frame_info.r[reg_save_b0] == 0)
2805 extra_spill_size += 8;
2809 /* Similarly for ar.pfs. */
2810 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2811 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2812 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2814 extra_spill_size += 8;
2818 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2819 registers are clobbered, so we fall back to the stack. */
2820 current_frame_info.r[reg_save_gp]
2821 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2822 if (current_frame_info.r[reg_save_gp] == 0)
2824 SET_HARD_REG_BIT (mask, GR_REG (1));
2831 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2833 SET_HARD_REG_BIT (mask, BR_REG (0));
2834 extra_spill_size += 8;
2838 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2840 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2841 current_frame_info.r[reg_save_ar_pfs]
2842 = find_gr_spill (reg_save_ar_pfs, 1);
2843 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2845 extra_spill_size += 8;
2851 /* Unwind descriptor hackery: things are most efficient if we allocate
2852 consecutive GR save registers for RP, PFS, FP in that order. However,
2853 it is absolutely critical that FP get the only hard register that's
2854 guaranteed to be free, so we allocated it first. If all three did
2855 happen to be allocated hard regs, and are consecutive, rearrange them
2856 into the preferred order now.
2858 If we have already emitted code for any of those registers,
2859 then it's already too late to change. */
2860 min_regno = MIN (current_frame_info.r[reg_fp],
2861 MIN (current_frame_info.r[reg_save_b0],
2862 current_frame_info.r[reg_save_ar_pfs]));
2863 max_regno = MAX (current_frame_info.r[reg_fp],
2864 MAX (current_frame_info.r[reg_save_b0],
2865 current_frame_info.r[reg_save_ar_pfs]));
2867 && min_regno + 2 == max_regno
2868 && (current_frame_info.r[reg_fp] == min_regno + 1
2869 || current_frame_info.r[reg_save_b0] == min_regno + 1
2870 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2871 && (emitted_frame_related_regs[reg_save_b0] == 0
2872 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2873 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2874 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2875 && (emitted_frame_related_regs[reg_fp] == 0
2876 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2878 current_frame_info.r[reg_save_b0] = min_regno;
2879 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2880 current_frame_info.r[reg_fp] = min_regno + 2;
2883 /* See if we need to store the predicate register block. */
2884 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2885 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2887 if (regno <= PR_REG (63))
2889 SET_HARD_REG_BIT (mask, PR_REG (0));
2890 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2891 if (current_frame_info.r[reg_save_pr] == 0)
2893 extra_spill_size += 8;
2897 /* ??? Mark them all as used so that register renaming and such
2898 are free to use them. */
2899 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2900 df_set_regs_ever_live (regno, true);
2903 /* If we're forced to use st8.spill, we're forced to save and restore
2904 ar.unat as well. The check for existing liveness allows inline asm
2905 to touch ar.unat. */
2906 if (spilled_gr_p || cfun->machine->n_varargs
2907 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2909 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2910 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2911 current_frame_info.r[reg_save_ar_unat]
2912 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2913 if (current_frame_info.r[reg_save_ar_unat] == 0)
2915 extra_spill_size += 8;
2920 if (df_regs_ever_live_p (AR_LC_REGNUM))
2922 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2923 current_frame_info.r[reg_save_ar_lc]
2924 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2925 if (current_frame_info.r[reg_save_ar_lc] == 0)
2927 extra_spill_size += 8;
2932 /* If we have an odd number of words of pretend arguments written to
2933 the stack, then the FR save area will be unaligned. We round the
2934 size of this area up to keep things 16 byte aligned. */
2936 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2938 pretend_args_size = crtl->args.pretend_args_size;
2940 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2941 + crtl->outgoing_args_size);
2942 total_size = IA64_STACK_ALIGN (total_size);
2944 /* We always use the 16-byte scratch area provided by the caller, but
2945 if we are a leaf function, there's no one to which we need to provide
2947 if (current_function_is_leaf)
2948 total_size = MAX (0, total_size - 16);
2950 current_frame_info.total_size = total_size;
2951 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2952 current_frame_info.spill_size = spill_size;
2953 current_frame_info.extra_spill_size = extra_spill_size;
2954 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2955 current_frame_info.n_spilled = n_spilled;
2956 current_frame_info.initialized = reload_completed;
2959 /* Worker function for TARGET_CAN_ELIMINATE. */
2962 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2964 return (to == BR_REG (0) ? current_function_is_leaf : true);
2967 /* Compute the initial difference between the specified pair of registers. */
2970 ia64_initial_elimination_offset (int from, int to)
2972 HOST_WIDE_INT offset;
2974 ia64_compute_frame_size (get_frame_size ());
2977 case FRAME_POINTER_REGNUM:
2980 case HARD_FRAME_POINTER_REGNUM:
2981 if (current_function_is_leaf)
2982 offset = -current_frame_info.total_size;
2984 offset = -(current_frame_info.total_size
2985 - crtl->outgoing_args_size - 16);
2988 case STACK_POINTER_REGNUM:
2989 if (current_function_is_leaf)
2992 offset = 16 + crtl->outgoing_args_size;
3000 case ARG_POINTER_REGNUM:
3001 /* Arguments start above the 16 byte save area, unless stdarg
3002 in which case we store through the 16 byte save area. */
3005 case HARD_FRAME_POINTER_REGNUM:
3006 offset = 16 - crtl->args.pretend_args_size;
3009 case STACK_POINTER_REGNUM:
3010 offset = (current_frame_info.total_size
3011 + 16 - crtl->args.pretend_args_size);
3026 /* If there are more than a trivial number of register spills, we use
3027 two interleaved iterators so that we can get two memory references
3030 In order to simplify things in the prologue and epilogue expanders,
3031 we use helper functions to fix up the memory references after the
3032 fact with the appropriate offsets to a POST_MODIFY memory mode.
3033 The following data structure tracks the state of the two iterators
3034 while insns are being emitted. */
3036 struct spill_fill_data
3038 rtx init_after; /* point at which to emit initializations */
3039 rtx init_reg[2]; /* initial base register */
3040 rtx iter_reg[2]; /* the iterator registers */
3041 rtx *prev_addr[2]; /* address of last memory use */
3042 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
3043 HOST_WIDE_INT prev_off[2]; /* last offset */
3044 int n_iter; /* number of iterators in use */
3045 int next_iter; /* next iterator to use */
3046 unsigned int save_gr_used_mask;
3049 static struct spill_fill_data spill_fill_data;
3052 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3056 spill_fill_data.init_after = get_last_insn ();
3057 spill_fill_data.init_reg[0] = init_reg;
3058 spill_fill_data.init_reg[1] = init_reg;
3059 spill_fill_data.prev_addr[0] = NULL;
3060 spill_fill_data.prev_addr[1] = NULL;
3061 spill_fill_data.prev_insn[0] = NULL;
3062 spill_fill_data.prev_insn[1] = NULL;
3063 spill_fill_data.prev_off[0] = cfa_off;
3064 spill_fill_data.prev_off[1] = cfa_off;
3065 spill_fill_data.next_iter = 0;
3066 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3068 spill_fill_data.n_iter = 1 + (n_spills > 2);
3069 for (i = 0; i < spill_fill_data.n_iter; ++i)
3071 int regno = next_scratch_gr_reg ();
3072 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3073 current_frame_info.gr_used_mask |= 1 << regno;
3078 finish_spill_pointers (void)
3080 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3084 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3086 int iter = spill_fill_data.next_iter;
3087 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3088 rtx disp_rtx = GEN_INT (disp);
3091 if (spill_fill_data.prev_addr[iter])
3093 if (satisfies_constraint_N (disp_rtx))
3095 *spill_fill_data.prev_addr[iter]
3096 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3097 gen_rtx_PLUS (DImode,
3098 spill_fill_data.iter_reg[iter],
3100 add_reg_note (spill_fill_data.prev_insn[iter],
3101 REG_INC, spill_fill_data.iter_reg[iter]);
3105 /* ??? Could use register post_modify for loads. */
3106 if (!satisfies_constraint_I (disp_rtx))
3108 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3109 emit_move_insn (tmp, disp_rtx);
3112 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3113 spill_fill_data.iter_reg[iter], disp_rtx));
3116 /* Micro-optimization: if we've created a frame pointer, it's at
3117 CFA 0, which may allow the real iterator to be initialized lower,
3118 slightly increasing parallelism. Also, if there are few saves
3119 it may eliminate the iterator entirely. */
3121 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3122 && frame_pointer_needed)
3124 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3125 set_mem_alias_set (mem, get_varargs_alias_set ());
3133 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3134 spill_fill_data.init_reg[iter]);
3139 if (!satisfies_constraint_I (disp_rtx))
3141 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3142 emit_move_insn (tmp, disp_rtx);
3146 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3147 spill_fill_data.init_reg[iter],
3154 /* Careful for being the first insn in a sequence. */
3155 if (spill_fill_data.init_after)
3156 insn = emit_insn_after (seq, spill_fill_data.init_after);
3159 rtx first = get_insns ();
3161 insn = emit_insn_before (seq, first);
3163 insn = emit_insn (seq);
3165 spill_fill_data.init_after = insn;
3168 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3170 /* ??? Not all of the spills are for varargs, but some of them are.
3171 The rest of the spills belong in an alias set of their own. But
3172 it doesn't actually hurt to include them here. */
3173 set_mem_alias_set (mem, get_varargs_alias_set ());
3175 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3176 spill_fill_data.prev_off[iter] = cfa_off;
3178 if (++iter >= spill_fill_data.n_iter)
3180 spill_fill_data.next_iter = iter;
3186 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3189 int iter = spill_fill_data.next_iter;
3192 mem = spill_restore_mem (reg, cfa_off);
3193 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3194 spill_fill_data.prev_insn[iter] = insn;
3201 RTX_FRAME_RELATED_P (insn) = 1;
3203 /* Don't even pretend that the unwind code can intuit its way
3204 through a pair of interleaved post_modify iterators. Just
3205 provide the correct answer. */
3207 if (frame_pointer_needed)
3209 base = hard_frame_pointer_rtx;
3214 base = stack_pointer_rtx;
3215 off = current_frame_info.total_size - cfa_off;
3218 add_reg_note (insn, REG_CFA_OFFSET,
3219 gen_rtx_SET (VOIDmode,
3220 gen_rtx_MEM (GET_MODE (reg),
3221 plus_constant (base, off)),
3227 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3229 int iter = spill_fill_data.next_iter;
3232 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3233 GEN_INT (cfa_off)));
3234 spill_fill_data.prev_insn[iter] = insn;
3237 /* Wrapper functions that discards the CONST_INT spill offset. These
3238 exist so that we can give gr_spill/gr_fill the offset they need and
3239 use a consistent function interface. */
3242 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3244 return gen_movdi (dest, src);
3248 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3250 return gen_fr_spill (dest, src);
3254 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3256 return gen_fr_restore (dest, src);
3259 /* Called after register allocation to add any instructions needed for the
3260 prologue. Using a prologue insn is favored compared to putting all of the
3261 instructions in output_function_prologue(), since it allows the scheduler
3262 to intermix instructions with the saves of the caller saved registers. In
3263 some cases, it might be necessary to emit a barrier instruction as the last
3264 insn to prevent such scheduling.
3266 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3267 so that the debug info generation code can handle them properly.
3269 The register save area is layed out like so:
3271 [ varargs spill area ]
3272 [ fr register spill area ]
3273 [ br register spill area ]
3274 [ ar register spill area ]
3275 [ pr register spill area ]
3276 [ gr register spill area ] */
3278 /* ??? Get inefficient code when the frame size is larger than can fit in an
3279 adds instruction. */
3282 ia64_expand_prologue (void)
3284 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3285 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3288 ia64_compute_frame_size (get_frame_size ());
3289 last_scratch_gr_reg = 15;
3291 if (flag_stack_usage_info)
3292 current_function_static_stack_size = current_frame_info.total_size;
3296 fprintf (dump_file, "ia64 frame related registers "
3297 "recorded in current_frame_info.r[]:\n");
3298 #define PRINTREG(a) if (current_frame_info.r[a]) \
3299 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3301 PRINTREG(reg_save_b0);
3302 PRINTREG(reg_save_pr);
3303 PRINTREG(reg_save_ar_pfs);
3304 PRINTREG(reg_save_ar_unat);
3305 PRINTREG(reg_save_ar_lc);
3306 PRINTREG(reg_save_gp);
3310 /* If there is no epilogue, then we don't need some prologue insns.
3311 We need to avoid emitting the dead prologue insns, because flow
3312 will complain about them. */
3318 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3319 if ((e->flags & EDGE_FAKE) == 0
3320 && (e->flags & EDGE_FALLTHRU) != 0)
3322 epilogue_p = (e != NULL);
3327 /* Set the local, input, and output register names. We need to do this
3328 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3329 half. If we use in/loc/out register names, then we get assembler errors
3330 in crtn.S because there is no alloc insn or regstk directive in there. */
3331 if (! TARGET_REG_NAMES)
3333 int inputs = current_frame_info.n_input_regs;
3334 int locals = current_frame_info.n_local_regs;
3335 int outputs = current_frame_info.n_output_regs;
3337 for (i = 0; i < inputs; i++)
3338 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3339 for (i = 0; i < locals; i++)
3340 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3341 for (i = 0; i < outputs; i++)
3342 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3345 /* Set the frame pointer register name. The regnum is logically loc79,
3346 but of course we'll not have allocated that many locals. Rather than
3347 worrying about renumbering the existing rtxs, we adjust the name. */
3348 /* ??? This code means that we can never use one local register when
3349 there is a frame pointer. loc79 gets wasted in this case, as it is
3350 renamed to a register that will never be used. See also the try_locals
3351 code in find_gr_spill. */
3352 if (current_frame_info.r[reg_fp])
3354 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3355 reg_names[HARD_FRAME_POINTER_REGNUM]
3356 = reg_names[current_frame_info.r[reg_fp]];
3357 reg_names[current_frame_info.r[reg_fp]] = tmp;
3360 /* We don't need an alloc instruction if we've used no outputs or locals. */
3361 if (current_frame_info.n_local_regs == 0
3362 && current_frame_info.n_output_regs == 0
3363 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3364 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3366 /* If there is no alloc, but there are input registers used, then we
3367 need a .regstk directive. */
3368 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3369 ar_pfs_save_reg = NULL_RTX;
3373 current_frame_info.need_regstk = 0;
3375 if (current_frame_info.r[reg_save_ar_pfs])
3377 regno = current_frame_info.r[reg_save_ar_pfs];
3378 reg_emitted (reg_save_ar_pfs);
3381 regno = next_scratch_gr_reg ();
3382 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3384 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3385 GEN_INT (current_frame_info.n_input_regs),
3386 GEN_INT (current_frame_info.n_local_regs),
3387 GEN_INT (current_frame_info.n_output_regs),
3388 GEN_INT (current_frame_info.n_rotate_regs)));
3389 if (current_frame_info.r[reg_save_ar_pfs])
3391 RTX_FRAME_RELATED_P (insn) = 1;
3392 add_reg_note (insn, REG_CFA_REGISTER,
3393 gen_rtx_SET (VOIDmode,
3395 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3399 /* Set up frame pointer, stack pointer, and spill iterators. */
3401 n_varargs = cfun->machine->n_varargs;
3402 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3403 stack_pointer_rtx, 0);
3405 if (frame_pointer_needed)
3407 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3408 RTX_FRAME_RELATED_P (insn) = 1;
3410 /* Force the unwind info to recognize this as defining a new CFA,
3411 rather than some temp register setup. */
3412 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3415 if (current_frame_info.total_size != 0)
3417 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3420 if (satisfies_constraint_I (frame_size_rtx))
3421 offset = frame_size_rtx;
3424 regno = next_scratch_gr_reg ();
3425 offset = gen_rtx_REG (DImode, regno);
3426 emit_move_insn (offset, frame_size_rtx);
3429 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3430 stack_pointer_rtx, offset));
3432 if (! frame_pointer_needed)
3434 RTX_FRAME_RELATED_P (insn) = 1;
3435 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3436 gen_rtx_SET (VOIDmode,
3438 gen_rtx_PLUS (DImode,
3443 /* ??? At this point we must generate a magic insn that appears to
3444 modify the stack pointer, the frame pointer, and all spill
3445 iterators. This would allow the most scheduling freedom. For
3446 now, just hard stop. */
3447 emit_insn (gen_blockage ());
3450 /* Must copy out ar.unat before doing any integer spills. */
3451 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3453 if (current_frame_info.r[reg_save_ar_unat])
3456 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3457 reg_emitted (reg_save_ar_unat);
3461 alt_regno = next_scratch_gr_reg ();
3462 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3463 current_frame_info.gr_used_mask |= 1 << alt_regno;
3466 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3467 insn = emit_move_insn (ar_unat_save_reg, reg);
3468 if (current_frame_info.r[reg_save_ar_unat])
3470 RTX_FRAME_RELATED_P (insn) = 1;
3471 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3474 /* Even if we're not going to generate an epilogue, we still
3475 need to save the register so that EH works. */
3476 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3477 emit_insn (gen_prologue_use (ar_unat_save_reg));
3480 ar_unat_save_reg = NULL_RTX;
3482 /* Spill all varargs registers. Do this before spilling any GR registers,
3483 since we want the UNAT bits for the GR registers to override the UNAT
3484 bits from varargs, which we don't care about. */
3487 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3489 reg = gen_rtx_REG (DImode, regno);
3490 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3493 /* Locate the bottom of the register save area. */
3494 cfa_off = (current_frame_info.spill_cfa_off
3495 + current_frame_info.spill_size
3496 + current_frame_info.extra_spill_size);
3498 /* Save the predicate register block either in a register or in memory. */
3499 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3501 reg = gen_rtx_REG (DImode, PR_REG (0));
3502 if (current_frame_info.r[reg_save_pr] != 0)
3504 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3505 reg_emitted (reg_save_pr);