1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
51 /* This is used for communication between ASM_OUTPUT_LABEL and
52 ASM_OUTPUT_LABELREF. */
53 int ia64_asm_output_label = 0;
55 /* Define the information needed to generate branch and scc insns. This is
56 stored from the compare operation. */
57 struct rtx_def * ia64_compare_op0;
58 struct rtx_def * ia64_compare_op1;
60 /* Register names for ia64_expand_prologue. */
61 static const char * const ia64_reg_numbers[96] =
62 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
63 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
64 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
65 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
66 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
67 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
68 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
69 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
70 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
71 "r104","r105","r106","r107","r108","r109","r110","r111",
72 "r112","r113","r114","r115","r116","r117","r118","r119",
73 "r120","r121","r122","r123","r124","r125","r126","r127"};
75 /* ??? These strings could be shared with REGISTER_NAMES. */
76 static const char * const ia64_input_reg_names[8] =
77 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
79 /* ??? These strings could be shared with REGISTER_NAMES. */
80 static const char * const ia64_local_reg_names[80] =
81 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
82 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
83 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
84 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
85 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
86 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
87 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
88 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
89 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
90 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
92 /* ??? These strings could be shared with REGISTER_NAMES. */
93 static const char * const ia64_output_reg_names[8] =
94 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
96 /* String used with the -mfixed-range= option. */
97 const char *ia64_fixed_range_string;
99 /* Determines whether we use adds, addl, or movl to generate our
100 TLS immediate offsets. */
101 int ia64_tls_size = 22;
103 /* String used with the -mtls-size= option. */
104 const char *ia64_tls_size_string;
106 /* Determines whether we run our final scheduling pass or not. We always
107 avoid the normal second scheduling pass. */
108 static int ia64_flag_schedule_insns2;
110 /* Variables which are this size or smaller are put in the sdata/sbss
113 unsigned int ia64_section_threshold;
115 static rtx gen_tls_get_addr PARAMS ((void));
116 static rtx gen_thread_pointer PARAMS ((void));
117 static int find_gr_spill PARAMS ((int));
118 static int next_scratch_gr_reg PARAMS ((void));
119 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
120 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
121 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
122 static void finish_spill_pointers PARAMS ((void));
123 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
124 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
125 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
126 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
127 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
128 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
130 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
131 static void fix_range PARAMS ((const char *));
132 static struct machine_function * ia64_init_machine_status PARAMS ((void));
133 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
134 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
135 static void emit_predicate_relation_info PARAMS ((void));
136 static bool ia64_in_small_data_p PARAMS ((tree));
137 static void ia64_encode_section_info PARAMS ((tree, int));
138 static const char *ia64_strip_name_encoding PARAMS ((const char *));
139 static void process_epilogue PARAMS ((void));
140 static int process_set PARAMS ((FILE *, rtx));
142 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
144 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
146 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
148 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
150 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
151 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
152 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
153 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
154 static void ia64_output_function_end_prologue PARAMS ((FILE *));
156 static int ia64_issue_rate PARAMS ((void));
157 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
158 static void ia64_sched_init PARAMS ((FILE *, int, int));
159 static void ia64_sched_finish PARAMS ((FILE *, int));
160 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
162 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
163 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
164 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
166 static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
167 HOST_WIDE_INT, tree));
169 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
170 unsigned HOST_WIDE_INT));
171 static void ia64_aix_select_section PARAMS ((tree, int,
172 unsigned HOST_WIDE_INT))
174 static void ia64_aix_unique_section PARAMS ((tree, int))
176 static void ia64_aix_select_rtx_section PARAMS ((enum machine_mode, rtx,
177 unsigned HOST_WIDE_INT))
180 static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
183 /* Table of valid machine attributes. */
184 static const struct attribute_spec ia64_attribute_table[] =
186 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
187 { "syscall_linkage", 0, 0, false, true, true, NULL },
188 { NULL, 0, 0, false, false, false, NULL }
191 /* Initialize the GCC target structure. */
192 #undef TARGET_ATTRIBUTE_TABLE
193 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
195 #undef TARGET_INIT_BUILTINS
196 #define TARGET_INIT_BUILTINS ia64_init_builtins
198 #undef TARGET_EXPAND_BUILTIN
199 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
201 #undef TARGET_ASM_BYTE_OP
202 #define TARGET_ASM_BYTE_OP "\tdata1\t"
203 #undef TARGET_ASM_ALIGNED_HI_OP
204 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
205 #undef TARGET_ASM_ALIGNED_SI_OP
206 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
207 #undef TARGET_ASM_ALIGNED_DI_OP
208 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
209 #undef TARGET_ASM_UNALIGNED_HI_OP
210 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
211 #undef TARGET_ASM_UNALIGNED_SI_OP
212 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
213 #undef TARGET_ASM_UNALIGNED_DI_OP
214 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
215 #undef TARGET_ASM_INTEGER
216 #define TARGET_ASM_INTEGER ia64_assemble_integer
218 #undef TARGET_ASM_FUNCTION_PROLOGUE
219 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
220 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
221 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
222 #undef TARGET_ASM_FUNCTION_EPILOGUE
223 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
225 #undef TARGET_IN_SMALL_DATA_P
226 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
227 #undef TARGET_ENCODE_SECTION_INFO
228 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
229 #undef TARGET_STRIP_NAME_ENCODING
230 #define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
232 #undef TARGET_SCHED_ADJUST_COST
233 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
234 #undef TARGET_SCHED_ISSUE_RATE
235 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
236 #undef TARGET_SCHED_VARIABLE_ISSUE
237 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
238 #undef TARGET_SCHED_INIT
239 #define TARGET_SCHED_INIT ia64_sched_init
240 #undef TARGET_SCHED_FINISH
241 #define TARGET_SCHED_FINISH ia64_sched_finish
242 #undef TARGET_SCHED_REORDER
243 #define TARGET_SCHED_REORDER ia64_sched_reorder
244 #undef TARGET_SCHED_REORDER2
245 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
248 #undef TARGET_HAVE_TLS
249 #define TARGET_HAVE_TLS true
252 #undef TARGET_ASM_OUTPUT_MI_THUNK
253 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
254 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
255 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
257 struct gcc_target targetm = TARGET_INITIALIZER;
259 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
262 call_operand (op, mode)
264 enum machine_mode mode;
266 if (mode != GET_MODE (op) && mode != VOIDmode)
269 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
270 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
273 /* Return 1 if OP refers to a symbol in the sdata section. */
276 sdata_symbolic_operand (op, mode)
278 enum machine_mode mode ATTRIBUTE_UNUSED;
280 switch (GET_CODE (op))
283 if (GET_CODE (XEXP (op, 0)) != PLUS
284 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
286 op = XEXP (XEXP (op, 0), 0);
290 if (CONSTANT_POOL_ADDRESS_P (op))
291 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
294 const char *str = XSTR (op, 0);
295 return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
305 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
308 got_symbolic_operand (op, mode)
310 enum machine_mode mode ATTRIBUTE_UNUSED;
312 switch (GET_CODE (op))
316 if (GET_CODE (op) != PLUS)
318 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
321 if (GET_CODE (op) != CONST_INT)
326 /* Ok if we're not using GOT entries at all. */
327 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
330 /* "Ok" while emitting rtl, since otherwise we won't be provided
331 with the entire offset during emission, which makes it very
332 hard to split the offset into high and low parts. */
333 if (rtx_equal_function_value_matters)
336 /* Force the low 14 bits of the constant to zero so that we do not
337 use up so many GOT entries. */
338 return (INTVAL (op) & 0x3fff) == 0;
350 /* Return 1 if OP refers to a symbol. */
353 symbolic_operand (op, mode)
355 enum machine_mode mode ATTRIBUTE_UNUSED;
357 switch (GET_CODE (op))
370 /* Return tls_model if OP refers to a TLS symbol. */
373 tls_symbolic_operand (op, mode)
375 enum machine_mode mode ATTRIBUTE_UNUSED;
379 if (GET_CODE (op) != SYMBOL_REF)
382 if (str[0] != ENCODE_SECTION_INFO_CHAR)
387 return TLS_MODEL_GLOBAL_DYNAMIC;
389 return TLS_MODEL_LOCAL_DYNAMIC;
391 return TLS_MODEL_INITIAL_EXEC;
393 return TLS_MODEL_LOCAL_EXEC;
399 /* Return 1 if OP refers to a function. */
402 function_operand (op, mode)
404 enum machine_mode mode ATTRIBUTE_UNUSED;
406 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
412 /* Return 1 if OP is setjmp or a similar function. */
414 /* ??? This is an unsatisfying solution. Should rethink. */
417 setjmp_operand (op, mode)
419 enum machine_mode mode ATTRIBUTE_UNUSED;
424 if (GET_CODE (op) != SYMBOL_REF)
429 /* The following code is borrowed from special_function_p in calls.c. */
431 /* Disregard prefix _, __ or __x. */
434 if (name[1] == '_' && name[2] == 'x')
436 else if (name[1] == '_')
446 && (! strcmp (name, "setjmp")
447 || ! strcmp (name, "setjmp_syscall")))
449 && ! strcmp (name, "sigsetjmp"))
451 && ! strcmp (name, "savectx")));
453 else if ((name[0] == 'q' && name[1] == 's'
454 && ! strcmp (name, "qsetjmp"))
455 || (name[0] == 'v' && name[1] == 'f'
456 && ! strcmp (name, "vfork")))
462 /* Return 1 if OP is a general operand, but when pic exclude symbolic
465 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
466 from PREDICATE_CODES. */
469 move_operand (op, mode)
471 enum machine_mode mode;
473 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
476 return general_operand (op, mode);
479 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
482 gr_register_operand (op, mode)
484 enum machine_mode mode;
486 if (! register_operand (op, mode))
488 if (GET_CODE (op) == SUBREG)
489 op = SUBREG_REG (op);
490 if (GET_CODE (op) == REG)
492 unsigned int regno = REGNO (op);
493 if (regno < FIRST_PSEUDO_REGISTER)
494 return GENERAL_REGNO_P (regno);
499 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
502 fr_register_operand (op, mode)
504 enum machine_mode mode;
506 if (! register_operand (op, mode))
508 if (GET_CODE (op) == SUBREG)
509 op = SUBREG_REG (op);
510 if (GET_CODE (op) == REG)
512 unsigned int regno = REGNO (op);
513 if (regno < FIRST_PSEUDO_REGISTER)
514 return FR_REGNO_P (regno);
519 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
522 grfr_register_operand (op, mode)
524 enum machine_mode mode;
526 if (! register_operand (op, mode))
528 if (GET_CODE (op) == SUBREG)
529 op = SUBREG_REG (op);
530 if (GET_CODE (op) == REG)
532 unsigned int regno = REGNO (op);
533 if (regno < FIRST_PSEUDO_REGISTER)
534 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
539 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
542 gr_nonimmediate_operand (op, mode)
544 enum machine_mode mode;
546 if (! nonimmediate_operand (op, mode))
548 if (GET_CODE (op) == SUBREG)
549 op = SUBREG_REG (op);
550 if (GET_CODE (op) == REG)
552 unsigned int regno = REGNO (op);
553 if (regno < FIRST_PSEUDO_REGISTER)
554 return GENERAL_REGNO_P (regno);
559 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
562 fr_nonimmediate_operand (op, mode)
564 enum machine_mode mode;
566 if (! nonimmediate_operand (op, mode))
568 if (GET_CODE (op) == SUBREG)
569 op = SUBREG_REG (op);
570 if (GET_CODE (op) == REG)
572 unsigned int regno = REGNO (op);
573 if (regno < FIRST_PSEUDO_REGISTER)
574 return FR_REGNO_P (regno);
579 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
582 grfr_nonimmediate_operand (op, mode)
584 enum machine_mode mode;
586 if (! nonimmediate_operand (op, mode))
588 if (GET_CODE (op) == SUBREG)
589 op = SUBREG_REG (op);
590 if (GET_CODE (op) == REG)
592 unsigned int regno = REGNO (op);
593 if (regno < FIRST_PSEUDO_REGISTER)
594 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
599 /* Return 1 if OP is a GR register operand, or zero. */
602 gr_reg_or_0_operand (op, mode)
604 enum machine_mode mode;
606 return (op == const0_rtx || gr_register_operand (op, mode));
609 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
612 gr_reg_or_5bit_operand (op, mode)
614 enum machine_mode mode;
616 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
617 || GET_CODE (op) == CONSTANT_P_RTX
618 || gr_register_operand (op, mode));
621 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
624 gr_reg_or_6bit_operand (op, mode)
626 enum machine_mode mode;
628 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
629 || GET_CODE (op) == CONSTANT_P_RTX
630 || gr_register_operand (op, mode));
633 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
636 gr_reg_or_8bit_operand (op, mode)
638 enum machine_mode mode;
640 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
641 || GET_CODE (op) == CONSTANT_P_RTX
642 || gr_register_operand (op, mode));
645 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
648 grfr_reg_or_8bit_operand (op, mode)
650 enum machine_mode mode;
652 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
653 || GET_CODE (op) == CONSTANT_P_RTX
654 || grfr_register_operand (op, mode));
657 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
661 gr_reg_or_8bit_adjusted_operand (op, mode)
663 enum machine_mode mode;
665 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
666 || GET_CODE (op) == CONSTANT_P_RTX
667 || gr_register_operand (op, mode));
670 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
671 immediate and an 8 bit adjusted immediate operand. This is necessary
672 because when we emit a compare, we don't know what the condition will be,
673 so we need the union of the immediates accepted by GT and LT. */
676 gr_reg_or_8bit_and_adjusted_operand (op, mode)
678 enum machine_mode mode;
680 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
681 && CONST_OK_FOR_L (INTVAL (op)))
682 || GET_CODE (op) == CONSTANT_P_RTX
683 || gr_register_operand (op, mode));
686 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
689 gr_reg_or_14bit_operand (op, mode)
691 enum machine_mode mode;
693 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
694 || GET_CODE (op) == CONSTANT_P_RTX
695 || gr_register_operand (op, mode));
698 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
701 gr_reg_or_22bit_operand (op, mode)
703 enum machine_mode mode;
705 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
706 || GET_CODE (op) == CONSTANT_P_RTX
707 || gr_register_operand (op, mode));
710 /* Return 1 if OP is a 6 bit immediate operand. */
713 shift_count_operand (op, mode)
715 enum machine_mode mode ATTRIBUTE_UNUSED;
717 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
718 || GET_CODE (op) == CONSTANT_P_RTX);
721 /* Return 1 if OP is a 5 bit immediate operand. */
724 shift_32bit_count_operand (op, mode)
726 enum machine_mode mode ATTRIBUTE_UNUSED;
728 return ((GET_CODE (op) == CONST_INT
729 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
730 || GET_CODE (op) == CONSTANT_P_RTX);
733 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
736 shladd_operand (op, mode)
738 enum machine_mode mode ATTRIBUTE_UNUSED;
740 return (GET_CODE (op) == CONST_INT
741 && (INTVAL (op) == 2 || INTVAL (op) == 4
742 || INTVAL (op) == 8 || INTVAL (op) == 16));
745 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
748 fetchadd_operand (op, mode)
750 enum machine_mode mode ATTRIBUTE_UNUSED;
752 return (GET_CODE (op) == CONST_INT
753 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
754 INTVAL (op) == -4 || INTVAL (op) == -1 ||
755 INTVAL (op) == 1 || INTVAL (op) == 4 ||
756 INTVAL (op) == 8 || INTVAL (op) == 16));
759 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
762 fr_reg_or_fp01_operand (op, mode)
764 enum machine_mode mode;
766 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
767 || fr_register_operand (op, mode));
770 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
771 POST_MODIFY with a REG as displacement. */
774 destination_operand (op, mode)
776 enum machine_mode mode;
778 if (! nonimmediate_operand (op, mode))
780 if (GET_CODE (op) == MEM
781 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
782 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
787 /* Like memory_operand, but don't allow post-increments. */
790 not_postinc_memory_operand (op, mode)
792 enum machine_mode mode;
794 return (memory_operand (op, mode)
795 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
798 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
799 signed immediate operand. */
802 normal_comparison_operator (op, mode)
804 enum machine_mode mode;
806 enum rtx_code code = GET_CODE (op);
807 return ((mode == VOIDmode || GET_MODE (op) == mode)
808 && (code == EQ || code == NE
809 || code == GT || code == LE || code == GTU || code == LEU));
812 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
813 signed immediate operand. */
816 adjusted_comparison_operator (op, mode)
818 enum machine_mode mode;
820 enum rtx_code code = GET_CODE (op);
821 return ((mode == VOIDmode || GET_MODE (op) == mode)
822 && (code == LT || code == GE || code == LTU || code == GEU));
825 /* Return 1 if this is a signed inequality operator. */
828 signed_inequality_operator (op, mode)
830 enum machine_mode mode;
832 enum rtx_code code = GET_CODE (op);
833 return ((mode == VOIDmode || GET_MODE (op) == mode)
834 && (code == GE || code == GT
835 || code == LE || code == LT));
838 /* Return 1 if this operator is valid for predication. */
841 predicate_operator (op, mode)
843 enum machine_mode mode;
845 enum rtx_code code = GET_CODE (op);
846 return ((GET_MODE (op) == mode || mode == VOIDmode)
847 && (code == EQ || code == NE));
850 /* Return 1 if this operator can be used in a conditional operation. */
853 condop_operator (op, mode)
855 enum machine_mode mode;
857 enum rtx_code code = GET_CODE (op);
858 return ((GET_MODE (op) == mode || mode == VOIDmode)
859 && (code == PLUS || code == MINUS || code == AND
860 || code == IOR || code == XOR));
863 /* Return 1 if this is the ar.lc register. */
866 ar_lc_reg_operand (op, mode)
868 enum machine_mode mode;
870 return (GET_MODE (op) == DImode
871 && (mode == DImode || mode == VOIDmode)
872 && GET_CODE (op) == REG
873 && REGNO (op) == AR_LC_REGNUM);
876 /* Return 1 if this is the ar.ccv register. */
879 ar_ccv_reg_operand (op, mode)
881 enum machine_mode mode;
883 return ((GET_MODE (op) == mode || mode == VOIDmode)
884 && GET_CODE (op) == REG
885 && REGNO (op) == AR_CCV_REGNUM);
888 /* Return 1 if this is the ar.pfs register. */
891 ar_pfs_reg_operand (op, mode)
893 enum machine_mode mode;
895 return ((GET_MODE (op) == mode || mode == VOIDmode)
896 && GET_CODE (op) == REG
897 && REGNO (op) == AR_PFS_REGNUM);
900 /* Like general_operand, but don't allow (mem (addressof)). */
903 general_tfmode_operand (op, mode)
905 enum machine_mode mode;
907 if (! general_operand (op, mode))
909 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
917 destination_tfmode_operand (op, mode)
919 enum machine_mode mode;
921 if (! destination_operand (op, mode))
923 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
931 tfreg_or_fp01_operand (op, mode)
933 enum machine_mode mode;
935 if (GET_CODE (op) == SUBREG)
937 return fr_reg_or_fp01_operand (op, mode);
940 /* Return 1 if OP is valid as a base register in a reg + offset address. */
943 basereg_operand (op, mode)
945 enum machine_mode mode;
947 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
948 checks from pa.c basereg_operand as well? Seems to be OK without them
951 return (register_operand (op, mode) &&
952 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
955 /* Return 1 if the operands of a move are ok. */
958 ia64_move_ok (dst, src)
961 /* If we're under init_recog_no_volatile, we'll not be able to use
962 memory_operand. So check the code directly and don't worry about
963 the validity of the underlying address, which should have been
964 checked elsewhere anyway. */
965 if (GET_CODE (dst) != MEM)
967 if (GET_CODE (src) == MEM)
969 if (register_operand (src, VOIDmode))
972 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
973 if (INTEGRAL_MODE_P (GET_MODE (dst)))
974 return src == const0_rtx;
976 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
979 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
980 Return the length of the field, or <= 0 on failure. */
983 ia64_depz_field_mask (rop, rshift)
986 unsigned HOST_WIDE_INT op = INTVAL (rop);
987 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
989 /* Get rid of the zero bits we're shifting in. */
992 /* We must now have a solid block of 1's at bit 0. */
993 return exact_log2 (op + 1);
996 /* Expand a symbolic constant load. */
997 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
1000 ia64_expand_load_address (dest, src, scratch)
1001 rtx dest, src, scratch;
1005 /* The destination could be a MEM during initial rtl generation,
1006 which isn't a valid destination for the PIC load address patterns. */
1007 if (! register_operand (dest, DImode))
1008 if (! scratch || ! register_operand (scratch, DImode))
1009 temp = gen_reg_rtx (DImode);
1015 if (tls_symbolic_operand (src, Pmode))
1018 if (TARGET_AUTO_PIC)
1019 emit_insn (gen_load_gprel64 (temp, src));
1020 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
1021 emit_insn (gen_load_fptr (temp, src));
1022 else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode)
1023 && sdata_symbolic_operand (src, VOIDmode))
1024 emit_insn (gen_load_gprel (temp, src));
1025 else if (GET_CODE (src) == CONST
1026 && GET_CODE (XEXP (src, 0)) == PLUS
1027 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1028 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1030 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1031 rtx sym = XEXP (XEXP (src, 0), 0);
1032 HOST_WIDE_INT ofs, hi, lo;
1034 /* Split the offset into a sign extended 14-bit low part
1035 and a complementary high part. */
1036 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1037 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1041 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
1043 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
1045 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
1051 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1053 insn = emit_insn (gen_load_symptr (temp, src, scratch));
1054 #ifdef POINTERS_EXTEND_UNSIGNED
1055 if (GET_MODE (temp) != GET_MODE (src))
1056 src = convert_memory_address (GET_MODE (temp), src);
1058 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
1063 if (GET_MODE (dest) != GET_MODE (temp))
1064 temp = convert_to_mode (GET_MODE (dest), temp, 0);
1065 emit_move_insn (dest, temp);
1069 static GTY(()) rtx gen_tls_tga;
1075 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1080 static GTY(()) rtx thread_pointer_rtx;
1082 gen_thread_pointer ()
1084 if (!thread_pointer_rtx)
1086 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1087 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1089 return thread_pointer_rtx;
1093 ia64_expand_move (op0, op1)
1096 enum machine_mode mode = GET_MODE (op0);
1098 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1099 op1 = force_reg (mode, op1);
1101 if (mode == Pmode || mode == ptr_mode)
1103 enum tls_model tls_kind;
1104 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1106 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1110 case TLS_MODEL_GLOBAL_DYNAMIC:
1113 tga_op1 = gen_reg_rtx (Pmode);
1114 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1115 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1116 RTX_UNCHANGING_P (tga_op1) = 1;
1118 tga_op2 = gen_reg_rtx (Pmode);
1119 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1120 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1121 RTX_UNCHANGING_P (tga_op2) = 1;
1123 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1124 LCT_CONST, Pmode, 2, tga_op1,
1125 Pmode, tga_op2, Pmode);
1127 insns = get_insns ();
1130 emit_libcall_block (insns, op0, tga_ret, op1);
1133 case TLS_MODEL_LOCAL_DYNAMIC:
1134 /* ??? This isn't the completely proper way to do local-dynamic
1135 If the call to __tls_get_addr is used only by a single symbol,
1136 then we should (somehow) move the dtprel to the second arg
1137 to avoid the extra add. */
1140 tga_op1 = gen_reg_rtx (Pmode);
1141 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1142 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1143 RTX_UNCHANGING_P (tga_op1) = 1;
1145 tga_op2 = const0_rtx;
1147 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1148 LCT_CONST, Pmode, 2, tga_op1,
1149 Pmode, tga_op2, Pmode);
1151 insns = get_insns ();
1154 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1156 tmp = gen_reg_rtx (Pmode);
1157 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1159 if (register_operand (op0, Pmode))
1162 tga_ret = gen_reg_rtx (Pmode);
1165 emit_insn (gen_load_dtprel (tga_ret, op1));
1166 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1169 emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1175 case TLS_MODEL_INITIAL_EXEC:
1176 tmp = gen_reg_rtx (Pmode);
1177 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1178 tmp = gen_rtx_MEM (Pmode, tmp);
1179 RTX_UNCHANGING_P (tmp) = 1;
1180 tmp = force_reg (Pmode, tmp);
1182 if (register_operand (op0, Pmode))
1185 op1 = gen_reg_rtx (Pmode);
1186 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1191 case TLS_MODEL_LOCAL_EXEC:
1192 if (register_operand (op0, Pmode))
1195 tmp = gen_reg_rtx (Pmode);
1198 emit_insn (gen_load_tprel (tmp, op1));
1199 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1202 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1212 else if (!TARGET_NO_PIC &&
1213 (symbolic_operand (op1, Pmode) ||
1214 symbolic_operand (op1, ptr_mode)))
1216 /* Before optimization starts, delay committing to any particular
1217 type of PIC address load. If this function gets deferred, we
1218 may acquire information that changes the value of the
1219 sdata_symbolic_operand predicate.
1221 But don't delay for function pointers. Loading a function address
1222 actually loads the address of the descriptor not the function.
1223 If we represent these as SYMBOL_REFs, then they get cse'd with
1224 calls, and we end up with calls to the descriptor address instead
1225 of calls to the function address. Functions are not candidates
1228 Don't delay for LABEL_REF because the splitter loses REG_LABEL
1229 notes. Don't delay for pool addresses on general principals;
1230 they'll never become non-local behind our back. */
1232 if (rtx_equal_function_value_matters
1233 && GET_CODE (op1) != LABEL_REF
1234 && ! (GET_CODE (op1) == SYMBOL_REF
1235 && (SYMBOL_REF_FLAG (op1)
1236 || CONSTANT_POOL_ADDRESS_P (op1)
1237 || STRING_POOL_ADDRESS_P (op1))))
1238 if (GET_MODE (op1) == DImode)
1239 emit_insn (gen_movdi_symbolic (op0, op1));
1241 emit_insn (gen_movsi_symbolic (op0, op1));
1243 ia64_expand_load_address (op0, op1, NULL_RTX);
1252 ia64_gp_save_reg (setjmp_p)
1255 rtx save = cfun->machine->ia64_gp_save;
1259 /* We can't save GP in a pseudo if we are calling setjmp, because
1260 pseudos won't be restored by longjmp. For now, we save it in r4. */
1261 /* ??? It would be more efficient to save this directly into a stack
1262 slot. Unfortunately, the stack slot address gets cse'd across
1263 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
1266 /* ??? Get the barf bag, Virginia. We've got to replace this thing
1267 in place, since this rtx is used in exception handling receivers.
1268 Moreover, we must get this rtx out of regno_reg_rtx or reload
1269 will do the wrong thing. */
1270 unsigned int old_regno = REGNO (save);
1271 if (setjmp_p && old_regno != GR_REG (4))
1273 REGNO (save) = GR_REG (4);
1274 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
1280 save = gen_rtx_REG (DImode, GR_REG (4));
1281 else if (! optimize)
1282 save = gen_rtx_REG (DImode, LOC_REG (0));
1284 save = gen_reg_rtx (DImode);
1285 cfun->machine->ia64_gp_save = save;
1291 /* Split a post-reload TImode reference into two DImode components. */
1294 ia64_split_timode (out, in, scratch)
1298 switch (GET_CODE (in))
1301 out[0] = gen_rtx_REG (DImode, REGNO (in));
1302 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1307 rtx base = XEXP (in, 0);
1309 switch (GET_CODE (base))
1312 out[0] = adjust_address (in, DImode, 0);
1315 base = XEXP (base, 0);
1316 out[0] = adjust_address (in, DImode, 0);
1319 /* Since we're changing the mode, we need to change to POST_MODIFY
1320 as well to preserve the size of the increment. Either that or
1321 do the update in two steps, but we've already got this scratch
1322 register handy so let's use it. */
1324 base = XEXP (base, 0);
1326 = change_address (in, DImode,
1328 (Pmode, base, plus_constant (base, 16)));
1331 base = XEXP (base, 0);
1333 = change_address (in, DImode,
1335 (Pmode, base, plus_constant (base, -16)));
1341 if (scratch == NULL_RTX)
1343 out[1] = change_address (in, DImode, scratch);
1344 return gen_adddi3 (scratch, base, GEN_INT (8));
1349 split_double (in, &out[0], &out[1]);
1357 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1358 through memory plus an extra GR scratch register. Except that you can
1359 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1360 SECONDARY_RELOAD_CLASS, but not both.
1362 We got into problems in the first place by allowing a construct like
1363 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1364 This solution attempts to prevent this situation from occurring. When
1365 we see something like the above, we spill the inner register to memory. */
1368 spill_tfmode_operand (in, force)
1372 if (GET_CODE (in) == SUBREG
1373 && GET_MODE (SUBREG_REG (in)) == TImode
1374 && GET_CODE (SUBREG_REG (in)) == REG)
1376 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1377 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1379 else if (force && GET_CODE (in) == REG)
1381 rtx mem = gen_mem_addressof (in, NULL_TREE);
1382 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1384 else if (GET_CODE (in) == MEM
1385 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1386 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1391 /* Emit comparison instruction if necessary, returning the expression
1392 that holds the compare result in the proper mode. */
1395 ia64_expand_compare (code, mode)
1397 enum machine_mode mode;
1399 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1402 /* If we have a BImode input, then we already have a compare result, and
1403 do not need to emit another comparison. */
1404 if (GET_MODE (op0) == BImode)
1406 if ((code == NE || code == EQ) && op1 == const0_rtx)
1413 cmp = gen_reg_rtx (BImode);
1414 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1415 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1419 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1422 /* Emit the appropriate sequence for a call. */
1425 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1431 rtx insn, b0, pfs, gp_save, narg_rtx, dest;
1435 addr = XEXP (addr, 0);
1436 b0 = gen_rtx_REG (DImode, R_BR (0));
1437 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1441 else if (IN_REGNO_P (REGNO (nextarg)))
1442 narg = REGNO (nextarg) - IN_REG (0);
1444 narg = REGNO (nextarg) - OUT_REG (0);
1445 narg_rtx = GEN_INT (narg);
1447 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1450 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1452 insn = gen_call_nopic (addr, narg_rtx, b0);
1454 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1455 emit_call_insn (insn);
1459 indirect_p = ! symbolic_operand (addr, VOIDmode);
1461 if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
1464 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1467 emit_move_insn (gp_save, pic_offset_table_rtx);
1469 /* If this is an indirect call, then we have the address of a descriptor. */
1472 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1473 emit_move_insn (pic_offset_table_rtx,
1474 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1480 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1482 insn = gen_call_pic (dest, narg_rtx, b0);
1484 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1485 emit_call_insn (insn);
1488 emit_move_insn (pic_offset_table_rtx, gp_save);
1491 /* Begin the assembly file. */
1494 emit_safe_across_calls (f)
1497 unsigned int rs, re;
1504 while (rs < 64 && call_used_regs[PR_REG (rs)])
1508 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1512 fputs ("\t.pred.safe_across_calls ", f);
1518 fprintf (f, "p%u", rs);
1520 fprintf (f, "p%u-p%u", rs, re - 1);
1528 /* Structure to be filled in by ia64_compute_frame_size with register
1529 save masks and offsets for the current function. */
1531 struct ia64_frame_info
1533 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1534 the caller's scratch area. */
1535 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1536 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1537 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1538 HARD_REG_SET mask; /* mask of saved registers. */
1539 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1540 registers or long-term scratches. */
1541 int n_spilled; /* number of spilled registers. */
1542 int reg_fp; /* register for fp. */
1543 int reg_save_b0; /* save register for b0. */
1544 int reg_save_pr; /* save register for prs. */
1545 int reg_save_ar_pfs; /* save register for ar.pfs. */
1546 int reg_save_ar_unat; /* save register for ar.unat. */
1547 int reg_save_ar_lc; /* save register for ar.lc. */
1548 int n_input_regs; /* number of input registers used. */
1549 int n_local_regs; /* number of local registers used. */
1550 int n_output_regs; /* number of output registers used. */
1551 int n_rotate_regs; /* number of rotating registers used. */
1553 char need_regstk; /* true if a .regstk directive needed. */
1554 char initialized; /* true if the data is finalized. */
1557 /* Current frame information calculated by ia64_compute_frame_size. */
1558 static struct ia64_frame_info current_frame_info;
1560 /* Helper function for ia64_compute_frame_size: find an appropriate general
1561 register to spill some special register to. SPECIAL_SPILL_MASK contains
1562 bits in GR0 to GR31 that have already been allocated by this routine.
1563 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1566 find_gr_spill (try_locals)
1571 /* If this is a leaf function, first try an otherwise unused
1572 call-clobbered register. */
1573 if (current_function_is_leaf)
1575 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1576 if (! regs_ever_live[regno]
1577 && call_used_regs[regno]
1578 && ! fixed_regs[regno]
1579 && ! global_regs[regno]
1580 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1582 current_frame_info.gr_used_mask |= 1 << regno;
1589 regno = current_frame_info.n_local_regs;
1590 /* If there is a frame pointer, then we can't use loc79, because
1591 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1592 reg_name switching code in ia64_expand_prologue. */
1593 if (regno < (80 - frame_pointer_needed))
1595 current_frame_info.n_local_regs = regno + 1;
1596 return LOC_REG (0) + regno;
1600 /* Failed to find a general register to spill to. Must use stack. */
1604 /* In order to make for nice schedules, we try to allocate every temporary
1605 to a different register. We must of course stay away from call-saved,
1606 fixed, and global registers. We must also stay away from registers
1607 allocated in current_frame_info.gr_used_mask, since those include regs
1608 used all through the prologue.
1610 Any register allocated here must be used immediately. The idea is to
1611 aid scheduling, not to solve data flow problems. */
1613 static int last_scratch_gr_reg;
1616 next_scratch_gr_reg ()
1620 for (i = 0; i < 32; ++i)
1622 regno = (last_scratch_gr_reg + i + 1) & 31;
1623 if (call_used_regs[regno]
1624 && ! fixed_regs[regno]
1625 && ! global_regs[regno]
1626 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1628 last_scratch_gr_reg = regno;
1633 /* There must be _something_ available. */
1637 /* Helper function for ia64_compute_frame_size, called through
1638 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1641 mark_reg_gr_used_mask (reg, data)
1643 void *data ATTRIBUTE_UNUSED;
1645 unsigned int regno = REGNO (reg);
1648 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1649 for (i = 0; i < n; ++i)
1650 current_frame_info.gr_used_mask |= 1 << (regno + i);
1654 /* Returns the number of bytes offset between the frame pointer and the stack
1655 pointer for the current function. SIZE is the number of bytes of space
1656 needed for local variables. */
1659 ia64_compute_frame_size (size)
1662 HOST_WIDE_INT total_size;
1663 HOST_WIDE_INT spill_size = 0;
1664 HOST_WIDE_INT extra_spill_size = 0;
1665 HOST_WIDE_INT pretend_args_size;
1668 int spilled_gr_p = 0;
1669 int spilled_fr_p = 0;
1673 if (current_frame_info.initialized)
1676 memset (¤t_frame_info, 0, sizeof current_frame_info);
1677 CLEAR_HARD_REG_SET (mask);
1679 /* Don't allocate scratches to the return register. */
1680 diddle_return_value (mark_reg_gr_used_mask, NULL);
1682 /* Don't allocate scratches to the EH scratch registers. */
1683 if (cfun->machine->ia64_eh_epilogue_sp)
1684 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1685 if (cfun->machine->ia64_eh_epilogue_bsp)
1686 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1688 /* Find the size of the register stack frame. We have only 80 local
1689 registers, because we reserve 8 for the inputs and 8 for the
1692 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1693 since we'll be adjusting that down later. */
1694 regno = LOC_REG (78) + ! frame_pointer_needed;
1695 for (; regno >= LOC_REG (0); regno--)
1696 if (regs_ever_live[regno])
1698 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1700 /* For functions marked with the syscall_linkage attribute, we must mark
1701 all eight input registers as in use, so that locals aren't visible to
1704 if (cfun->machine->n_varargs > 0
1705 || lookup_attribute ("syscall_linkage",
1706 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1707 current_frame_info.n_input_regs = 8;
1710 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1711 if (regs_ever_live[regno])
1713 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1716 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1717 if (regs_ever_live[regno])
1719 i = regno - OUT_REG (0) + 1;
1721 /* When -p profiling, we need one output register for the mcount argument.
1722 Likwise for -a profiling for the bb_init_func argument. For -ax
1723 profiling, we need two output registers for the two bb_init_trace_func
1725 if (current_function_profile)
1727 current_frame_info.n_output_regs = i;
1729 /* ??? No rotating register support yet. */
1730 current_frame_info.n_rotate_regs = 0;
1732 /* Discover which registers need spilling, and how much room that
1733 will take. Begin with floating point and general registers,
1734 which will always wind up on the stack. */
1736 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1737 if (regs_ever_live[regno] && ! call_used_regs[regno])
1739 SET_HARD_REG_BIT (mask, regno);
1745 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1746 if (regs_ever_live[regno] && ! call_used_regs[regno])
1748 SET_HARD_REG_BIT (mask, regno);
1754 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1755 if (regs_ever_live[regno] && ! call_used_regs[regno])
1757 SET_HARD_REG_BIT (mask, regno);
1762 /* Now come all special registers that might get saved in other
1763 general registers. */
1765 if (frame_pointer_needed)
1767 current_frame_info.reg_fp = find_gr_spill (1);
1768 /* If we did not get a register, then we take LOC79. This is guaranteed
1769 to be free, even if regs_ever_live is already set, because this is
1770 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1771 as we don't count loc79 above. */
1772 if (current_frame_info.reg_fp == 0)
1774 current_frame_info.reg_fp = LOC_REG (79);
1775 current_frame_info.n_local_regs++;
1779 if (! current_function_is_leaf)
1781 /* Emit a save of BR0 if we call other functions. Do this even
1782 if this function doesn't return, as EH depends on this to be
1783 able to unwind the stack. */
1784 SET_HARD_REG_BIT (mask, BR_REG (0));
1786 current_frame_info.reg_save_b0 = find_gr_spill (1);
1787 if (current_frame_info.reg_save_b0 == 0)
1793 /* Similarly for ar.pfs. */
1794 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1795 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1796 if (current_frame_info.reg_save_ar_pfs == 0)
1798 extra_spill_size += 8;
1804 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1806 SET_HARD_REG_BIT (mask, BR_REG (0));
1812 /* Unwind descriptor hackery: things are most efficient if we allocate
1813 consecutive GR save registers for RP, PFS, FP in that order. However,
1814 it is absolutely critical that FP get the only hard register that's
1815 guaranteed to be free, so we allocated it first. If all three did
1816 happen to be allocated hard regs, and are consecutive, rearrange them
1817 into the preferred order now. */
1818 if (current_frame_info.reg_fp != 0
1819 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1820 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1822 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1823 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1824 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1827 /* See if we need to store the predicate register block. */
1828 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1829 if (regs_ever_live[regno] && ! call_used_regs[regno])
1831 if (regno <= PR_REG (63))
1833 SET_HARD_REG_BIT (mask, PR_REG (0));
1834 current_frame_info.reg_save_pr = find_gr_spill (1);
1835 if (current_frame_info.reg_save_pr == 0)
1837 extra_spill_size += 8;
1841 /* ??? Mark them all as used so that register renaming and such
1842 are free to use them. */
1843 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1844 regs_ever_live[regno] = 1;
1847 /* If we're forced to use st8.spill, we're forced to save and restore
1849 if (spilled_gr_p || cfun->machine->n_varargs)
1851 regs_ever_live[AR_UNAT_REGNUM] = 1;
1852 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1853 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1854 if (current_frame_info.reg_save_ar_unat == 0)
1856 extra_spill_size += 8;
1861 if (regs_ever_live[AR_LC_REGNUM])
1863 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1864 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1865 if (current_frame_info.reg_save_ar_lc == 0)
1867 extra_spill_size += 8;
1872 /* If we have an odd number of words of pretend arguments written to
1873 the stack, then the FR save area will be unaligned. We round the
1874 size of this area up to keep things 16 byte aligned. */
1876 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1878 pretend_args_size = current_function_pretend_args_size;
1880 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1881 + current_function_outgoing_args_size);
1882 total_size = IA64_STACK_ALIGN (total_size);
1884 /* We always use the 16-byte scratch area provided by the caller, but
1885 if we are a leaf function, there's no one to which we need to provide
1887 if (current_function_is_leaf)
1888 total_size = MAX (0, total_size - 16);
1890 current_frame_info.total_size = total_size;
1891 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1892 current_frame_info.spill_size = spill_size;
1893 current_frame_info.extra_spill_size = extra_spill_size;
1894 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1895 current_frame_info.n_spilled = n_spilled;
1896 current_frame_info.initialized = reload_completed;
1899 /* Compute the initial difference between the specified pair of registers. */
1902 ia64_initial_elimination_offset (from, to)
1905 HOST_WIDE_INT offset;
1907 ia64_compute_frame_size (get_frame_size ());
1910 case FRAME_POINTER_REGNUM:
1911 if (to == HARD_FRAME_POINTER_REGNUM)
1913 if (current_function_is_leaf)
1914 offset = -current_frame_info.total_size;
1916 offset = -(current_frame_info.total_size
1917 - current_function_outgoing_args_size - 16);
1919 else if (to == STACK_POINTER_REGNUM)
1921 if (current_function_is_leaf)
1924 offset = 16 + current_function_outgoing_args_size;
1930 case ARG_POINTER_REGNUM:
1931 /* Arguments start above the 16 byte save area, unless stdarg
1932 in which case we store through the 16 byte save area. */
1933 if (to == HARD_FRAME_POINTER_REGNUM)
1934 offset = 16 - current_function_pretend_args_size;
1935 else if (to == STACK_POINTER_REGNUM)
1936 offset = (current_frame_info.total_size
1937 + 16 - current_function_pretend_args_size);
1942 case RETURN_ADDRESS_POINTER_REGNUM:
1953 /* If there are more than a trivial number of register spills, we use
1954 two interleaved iterators so that we can get two memory references
1957 In order to simplify things in the prologue and epilogue expanders,
1958 we use helper functions to fix up the memory references after the
1959 fact with the appropriate offsets to a POST_MODIFY memory mode.
1960 The following data structure tracks the state of the two iterators
1961 while insns are being emitted. */
1963 struct spill_fill_data
1965 rtx init_after; /* point at which to emit initializations */
1966 rtx init_reg[2]; /* initial base register */
1967 rtx iter_reg[2]; /* the iterator registers */
1968 rtx *prev_addr[2]; /* address of last memory use */
1969 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1970 HOST_WIDE_INT prev_off[2]; /* last offset */
1971 int n_iter; /* number of iterators in use */
1972 int next_iter; /* next iterator to use */
1973 unsigned int save_gr_used_mask;
1976 static struct spill_fill_data spill_fill_data;
1979 setup_spill_pointers (n_spills, init_reg, cfa_off)
1982 HOST_WIDE_INT cfa_off;
1986 spill_fill_data.init_after = get_last_insn ();
1987 spill_fill_data.init_reg[0] = init_reg;
1988 spill_fill_data.init_reg[1] = init_reg;
1989 spill_fill_data.prev_addr[0] = NULL;
1990 spill_fill_data.prev_addr[1] = NULL;
1991 spill_fill_data.prev_insn[0] = NULL;
1992 spill_fill_data.prev_insn[1] = NULL;
1993 spill_fill_data.prev_off[0] = cfa_off;
1994 spill_fill_data.prev_off[1] = cfa_off;
1995 spill_fill_data.next_iter = 0;
1996 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1998 spill_fill_data.n_iter = 1 + (n_spills > 2);
1999 for (i = 0; i < spill_fill_data.n_iter; ++i)
2001 int regno = next_scratch_gr_reg ();
2002 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2003 current_frame_info.gr_used_mask |= 1 << regno;
2008 finish_spill_pointers ()
2010 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2014 spill_restore_mem (reg, cfa_off)
2016 HOST_WIDE_INT cfa_off;
2018 int iter = spill_fill_data.next_iter;
2019 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2020 rtx disp_rtx = GEN_INT (disp);
2023 if (spill_fill_data.prev_addr[iter])
2025 if (CONST_OK_FOR_N (disp))
2027 *spill_fill_data.prev_addr[iter]
2028 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2029 gen_rtx_PLUS (DImode,
2030 spill_fill_data.iter_reg[iter],
2032 REG_NOTES (spill_fill_data.prev_insn[iter])
2033 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2034 REG_NOTES (spill_fill_data.prev_insn[iter]));
2038 /* ??? Could use register post_modify for loads. */
2039 if (! CONST_OK_FOR_I (disp))
2041 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2042 emit_move_insn (tmp, disp_rtx);
2045 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2046 spill_fill_data.iter_reg[iter], disp_rtx));
2049 /* Micro-optimization: if we've created a frame pointer, it's at
2050 CFA 0, which may allow the real iterator to be initialized lower,
2051 slightly increasing parallelism. Also, if there are few saves
2052 it may eliminate the iterator entirely. */
2054 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2055 && frame_pointer_needed)
2057 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2058 set_mem_alias_set (mem, get_varargs_alias_set ());
2066 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2067 spill_fill_data.init_reg[iter]);
2072 if (! CONST_OK_FOR_I (disp))
2074 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2075 emit_move_insn (tmp, disp_rtx);
2079 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2080 spill_fill_data.init_reg[iter],
2087 /* Careful for being the first insn in a sequence. */
2088 if (spill_fill_data.init_after)
2089 insn = emit_insn_after (seq, spill_fill_data.init_after);
2092 rtx first = get_insns ();
2094 insn = emit_insn_before (seq, first);
2096 insn = emit_insn (seq);
2098 spill_fill_data.init_after = insn;
2100 /* If DISP is 0, we may or may not have a further adjustment
2101 afterward. If we do, then the load/store insn may be modified
2102 to be a post-modify. If we don't, then this copy may be
2103 eliminated by copyprop_hardreg_forward, which makes this
2104 insn garbage, which runs afoul of the sanity check in
2105 propagate_one_insn. So mark this insn as legal to delete. */
2107 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2111 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2113 /* ??? Not all of the spills are for varargs, but some of them are.
2114 The rest of the spills belong in an alias set of their own. But
2115 it doesn't actually hurt to include them here. */
2116 set_mem_alias_set (mem, get_varargs_alias_set ());
2118 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2119 spill_fill_data.prev_off[iter] = cfa_off;
2121 if (++iter >= spill_fill_data.n_iter)
2123 spill_fill_data.next_iter = iter;
2129 do_spill (move_fn, reg, cfa_off, frame_reg)
2130 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2132 HOST_WIDE_INT cfa_off;
2134 int iter = spill_fill_data.next_iter;
2137 mem = spill_restore_mem (reg, cfa_off);
2138 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2139 spill_fill_data.prev_insn[iter] = insn;
2146 RTX_FRAME_RELATED_P (insn) = 1;
2148 /* Don't even pretend that the unwind code can intuit its way
2149 through a pair of interleaved post_modify iterators. Just
2150 provide the correct answer. */
2152 if (frame_pointer_needed)
2154 base = hard_frame_pointer_rtx;
2159 base = stack_pointer_rtx;
2160 off = current_frame_info.total_size - cfa_off;
2164 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2165 gen_rtx_SET (VOIDmode,
2166 gen_rtx_MEM (GET_MODE (reg),
2167 plus_constant (base, off)),
2174 do_restore (move_fn, reg, cfa_off)
2175 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2177 HOST_WIDE_INT cfa_off;
2179 int iter = spill_fill_data.next_iter;
2182 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2183 GEN_INT (cfa_off)));
2184 spill_fill_data.prev_insn[iter] = insn;
2187 /* Wrapper functions that discards the CONST_INT spill offset. These
2188 exist so that we can give gr_spill/gr_fill the offset they need and
2189 use a consistant function interface. */
2192 gen_movdi_x (dest, src, offset)
2194 rtx offset ATTRIBUTE_UNUSED;
2196 return gen_movdi (dest, src);
2200 gen_fr_spill_x (dest, src, offset)
2202 rtx offset ATTRIBUTE_UNUSED;
2204 return gen_fr_spill (dest, src);
2208 gen_fr_restore_x (dest, src, offset)
2210 rtx offset ATTRIBUTE_UNUSED;
2212 return gen_fr_restore (dest, src);
2215 /* Called after register allocation to add any instructions needed for the
2216 prologue. Using a prologue insn is favored compared to putting all of the
2217 instructions in output_function_prologue(), since it allows the scheduler
2218 to intermix instructions with the saves of the caller saved registers. In
2219 some cases, it might be necessary to emit a barrier instruction as the last
2220 insn to prevent such scheduling.
2222 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2223 so that the debug info generation code can handle them properly.
2225 The register save area is layed out like so:
2227 [ varargs spill area ]
2228 [ fr register spill area ]
2229 [ br register spill area ]
2230 [ ar register spill area ]
2231 [ pr register spill area ]
2232 [ gr register spill area ] */
2234 /* ??? Get inefficient code when the frame size is larger than can fit in an
2235 adds instruction. */
2238 ia64_expand_prologue ()
2240 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2241 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2244 ia64_compute_frame_size (get_frame_size ());
2245 last_scratch_gr_reg = 15;
2247 /* If there is no epilogue, then we don't need some prologue insns.
2248 We need to avoid emitting the dead prologue insns, because flow
2249 will complain about them. */
2254 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2255 if ((e->flags & EDGE_FAKE) == 0
2256 && (e->flags & EDGE_FALLTHRU) != 0)
2258 epilogue_p = (e != NULL);
2263 /* Set the local, input, and output register names. We need to do this
2264 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2265 half. If we use in/loc/out register names, then we get assembler errors
2266 in crtn.S because there is no alloc insn or regstk directive in there. */
2267 if (! TARGET_REG_NAMES)
2269 int inputs = current_frame_info.n_input_regs;
2270 int locals = current_frame_info.n_local_regs;
2271 int outputs = current_frame_info.n_output_regs;
2273 for (i = 0; i < inputs; i++)
2274 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2275 for (i = 0; i < locals; i++)
2276 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2277 for (i = 0; i < outputs; i++)
2278 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2281 /* Set the frame pointer register name. The regnum is logically loc79,
2282 but of course we'll not have allocated that many locals. Rather than
2283 worrying about renumbering the existing rtxs, we adjust the name. */
2284 /* ??? This code means that we can never use one local register when
2285 there is a frame pointer. loc79 gets wasted in this case, as it is
2286 renamed to a register that will never be used. See also the try_locals
2287 code in find_gr_spill. */
2288 if (current_frame_info.reg_fp)
2290 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2291 reg_names[HARD_FRAME_POINTER_REGNUM]
2292 = reg_names[current_frame_info.reg_fp];
2293 reg_names[current_frame_info.reg_fp] = tmp;
2296 /* Fix up the return address placeholder. */
2297 /* ??? We can fail if __builtin_return_address is used, and we didn't
2298 allocate a register in which to save b0. I can't think of a way to
2299 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2300 then be sure that I got the right one. Further, reload doesn't seem
2301 to care if an eliminable register isn't used, and "eliminates" it
2303 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2304 && current_frame_info.reg_save_b0 != 0)
2305 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2307 /* We don't need an alloc instruction if we've used no outputs or locals. */
2308 if (current_frame_info.n_local_regs == 0
2309 && current_frame_info.n_output_regs == 0
2310 && current_frame_info.n_input_regs <= current_function_args_info.int_regs)
2312 /* If there is no alloc, but there are input registers used, then we
2313 need a .regstk directive. */
2314 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2315 ar_pfs_save_reg = NULL_RTX;
2319 current_frame_info.need_regstk = 0;
2321 if (current_frame_info.reg_save_ar_pfs)
2322 regno = current_frame_info.reg_save_ar_pfs;
2324 regno = next_scratch_gr_reg ();
2325 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2327 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2328 GEN_INT (current_frame_info.n_input_regs),
2329 GEN_INT (current_frame_info.n_local_regs),
2330 GEN_INT (current_frame_info.n_output_regs),
2331 GEN_INT (current_frame_info.n_rotate_regs)));
2332 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2335 /* Set up frame pointer, stack pointer, and spill iterators. */
2337 n_varargs = cfun->machine->n_varargs;
2338 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2339 stack_pointer_rtx, 0);
2341 if (frame_pointer_needed)
2343 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2344 RTX_FRAME_RELATED_P (insn) = 1;
2347 if (current_frame_info.total_size != 0)
2349 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2352 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2353 offset = frame_size_rtx;
2356 regno = next_scratch_gr_reg ();
2357 offset = gen_rtx_REG (DImode, regno);
2358 emit_move_insn (offset, frame_size_rtx);
2361 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2362 stack_pointer_rtx, offset));
2364 if (! frame_pointer_needed)
2366 RTX_FRAME_RELATED_P (insn) = 1;
2367 if (GET_CODE (offset) != CONST_INT)
2370 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2371 gen_rtx_SET (VOIDmode,
2373 gen_rtx_PLUS (DImode,
2380 /* ??? At this point we must generate a magic insn that appears to
2381 modify the stack pointer, the frame pointer, and all spill
2382 iterators. This would allow the most scheduling freedom. For
2383 now, just hard stop. */
2384 emit_insn (gen_blockage ());
2387 /* Must copy out ar.unat before doing any integer spills. */
2388 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2390 if (current_frame_info.reg_save_ar_unat)
2392 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2395 alt_regno = next_scratch_gr_reg ();
2396 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2397 current_frame_info.gr_used_mask |= 1 << alt_regno;
2400 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2401 insn = emit_move_insn (ar_unat_save_reg, reg);
2402 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2404 /* Even if we're not going to generate an epilogue, we still
2405 need to save the register so that EH works. */
2406 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2407 emit_insn (gen_prologue_use (ar_unat_save_reg));
2410 ar_unat_save_reg = NULL_RTX;
2412 /* Spill all varargs registers. Do this before spilling any GR registers,
2413 since we want the UNAT bits for the GR registers to override the UNAT
2414 bits from varargs, which we don't care about. */
2417 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2419 reg = gen_rtx_REG (DImode, regno);
2420 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2423 /* Locate the bottom of the register save area. */
2424 cfa_off = (current_frame_info.spill_cfa_off
2425 + current_frame_info.spill_size
2426 + current_frame_info.extra_spill_size);
2428 /* Save the predicate register block either in a register or in memory. */
2429 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2431 reg = gen_rtx_REG (DImode, PR_REG (0));
2432 if (current_frame_info.reg_save_pr != 0)
2434 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2435 insn = emit_move_insn (alt_reg, reg);
2437 /* ??? Denote pr spill/fill by a DImode move that modifies all
2438 64 hard registers. */
2439 RTX_FRAME_RELATED_P (insn) = 1;
2441 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2442 gen_rtx_SET (VOIDmode, alt_reg, reg),
2445 /* Even if we're not going to generate an epilogue, we still
2446 need to save the register so that EH works. */
2448 emit_insn (gen_prologue_use (alt_reg));
2452 alt_regno = next_scratch_gr_reg ();
2453 alt_reg = gen_rtx_REG (DImode, alt_regno);
2454 insn = emit_move_insn (alt_reg, reg);
2455 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2460 /* Handle AR regs in numerical order. All of them get special handling. */
2461 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2462 && current_frame_info.reg_save_ar_unat == 0)
2464 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2465 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2469 /* The alloc insn already copied ar.pfs into a general register. The
2470 only thing we have to do now is copy that register to a stack slot
2471 if we'd not allocated a local register for the job. */
2472 if (current_frame_info.reg_save_ar_pfs == 0
2473 && ! current_function_is_leaf)
2475 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2476 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2480 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2482 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2483 if (current_frame_info.reg_save_ar_lc != 0)
2485 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2486 insn = emit_move_insn (alt_reg, reg);
2487 RTX_FRAME_RELATED_P (insn) = 1;
2489 /* Even if we're not going to generate an epilogue, we still
2490 need to save the register so that EH works. */
2492 emit_insn (gen_prologue_use (alt_reg));
2496 alt_regno = next_scratch_gr_reg ();
2497 alt_reg = gen_rtx_REG (DImode, alt_regno);
2498 emit_move_insn (alt_reg, reg);
2499 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2504 /* We should now be at the base of the gr/br/fr spill area. */
2505 if (cfa_off != (current_frame_info.spill_cfa_off
2506 + current_frame_info.spill_size))
2509 /* Spill all general registers. */
2510 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2511 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2513 reg = gen_rtx_REG (DImode, regno);
2514 do_spill (gen_gr_spill, reg, cfa_off, reg);
2518 /* Handle BR0 specially -- it may be getting stored permanently in
2519 some GR register. */
2520 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2522 reg = gen_rtx_REG (DImode, BR_REG (0));
2523 if (current_frame_info.reg_save_b0 != 0)
2525 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2526 insn = emit_move_insn (alt_reg, reg);
2527 RTX_FRAME_RELATED_P (insn) = 1;
2529 /* Even if we're not going to generate an epilogue, we still
2530 need to save the register so that EH works. */
2532 emit_insn (gen_prologue_use (alt_reg));
2536 alt_regno = next_scratch_gr_reg ();
2537 alt_reg = gen_rtx_REG (DImode, alt_regno);
2538 emit_move_insn (alt_reg, reg);
2539 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2544 /* Spill the rest of the BR registers. */
2545 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2546 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2548 alt_regno = next_scratch_gr_reg ();
2549 alt_reg = gen_rtx_REG (DImode, alt_regno);
2550 reg = gen_rtx_REG (DImode, regno);
2551 emit_move_insn (alt_reg, reg);
2552 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2556 /* Align the frame and spill all FR registers. */
2557 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2558 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2562 reg = gen_rtx_REG (TFmode, regno);
2563 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2567 if (cfa_off != current_frame_info.spill_cfa_off)
2570 finish_spill_pointers ();
2573 /* Called after register allocation to add any instructions needed for the
2574 epilogue. Using an epilogue insn is favored compared to putting all of the
2575 instructions in output_function_prologue(), since it allows the scheduler
2576 to intermix instructions with the saves of the caller saved registers. In
2577 some cases, it might be necessary to emit a barrier instruction as the last
2578 insn to prevent such scheduling. */
2581 ia64_expand_epilogue (sibcall_p)
2584 rtx insn, reg, alt_reg, ar_unat_save_reg;
2585 int regno, alt_regno, cfa_off;
2587 ia64_compute_frame_size (get_frame_size ());
2589 /* If there is a frame pointer, then we use it instead of the stack
2590 pointer, so that the stack pointer does not need to be valid when
2591 the epilogue starts. See EXIT_IGNORE_STACK. */
2592 if (frame_pointer_needed)
2593 setup_spill_pointers (current_frame_info.n_spilled,
2594 hard_frame_pointer_rtx, 0);
2596 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2597 current_frame_info.total_size);
2599 if (current_frame_info.total_size != 0)
2601 /* ??? At this point we must generate a magic insn that appears to
2602 modify the spill iterators and the frame pointer. This would
2603 allow the most scheduling freedom. For now, just hard stop. */
2604 emit_insn (gen_blockage ());
2607 /* Locate the bottom of the register save area. */
2608 cfa_off = (current_frame_info.spill_cfa_off
2609 + current_frame_info.spill_size
2610 + current_frame_info.extra_spill_size);
2612 /* Restore the predicate registers. */
2613 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2615 if (current_frame_info.reg_save_pr != 0)
2616 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2619 alt_regno = next_scratch_gr_reg ();
2620 alt_reg = gen_rtx_REG (DImode, alt_regno);
2621 do_restore (gen_movdi_x, alt_reg, cfa_off);
2624 reg = gen_rtx_REG (DImode, PR_REG (0));
2625 emit_move_insn (reg, alt_reg);
2628 /* Restore the application registers. */
2630 /* Load the saved unat from the stack, but do not restore it until
2631 after the GRs have been restored. */
2632 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2634 if (current_frame_info.reg_save_ar_unat != 0)
2636 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2639 alt_regno = next_scratch_gr_reg ();
2640 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2641 current_frame_info.gr_used_mask |= 1 << alt_regno;
2642 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2647 ar_unat_save_reg = NULL_RTX;
2649 if (current_frame_info.reg_save_ar_pfs != 0)
2651 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2652 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2653 emit_move_insn (reg, alt_reg);
2655 else if (! current_function_is_leaf)
2657 alt_regno = next_scratch_gr_reg ();
2658 alt_reg = gen_rtx_REG (DImode, alt_regno);
2659 do_restore (gen_movdi_x, alt_reg, cfa_off);
2661 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2662 emit_move_insn (reg, alt_reg);
2665 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2667 if (current_frame_info.reg_save_ar_lc != 0)
2668 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2671 alt_regno = next_scratch_gr_reg ();
2672 alt_reg = gen_rtx_REG (DImode, alt_regno);
2673 do_restore (gen_movdi_x, alt_reg, cfa_off);
2676 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2677 emit_move_insn (reg, alt_reg);
2680 /* We should now be at the base of the gr/br/fr spill area. */
2681 if (cfa_off != (current_frame_info.spill_cfa_off
2682 + current_frame_info.spill_size))
2685 /* Restore all general registers. */
2686 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2687 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2689 reg = gen_rtx_REG (DImode, regno);
2690 do_restore (gen_gr_restore, reg, cfa_off);
2694 /* Restore the branch registers. Handle B0 specially, as it may
2695 have gotten stored in some GR register. */
2696 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2698 if (current_frame_info.reg_save_b0 != 0)
2699 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2702 alt_regno = next_scratch_gr_reg ();
2703 alt_reg = gen_rtx_REG (DImode, alt_regno);
2704 do_restore (gen_movdi_x, alt_reg, cfa_off);
2707 reg = gen_rtx_REG (DImode, BR_REG (0));
2708 emit_move_insn (reg, alt_reg);
2711 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2712 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2714 alt_regno = next_scratch_gr_reg ();
2715 alt_reg = gen_rtx_REG (DImode, alt_regno);
2716 do_restore (gen_movdi_x, alt_reg, cfa_off);
2718 reg = gen_rtx_REG (DImode, regno);
2719 emit_move_insn (reg, alt_reg);
2722 /* Restore floating point registers. */
2723 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2724 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2728 reg = gen_rtx_REG (TFmode, regno);
2729 do_restore (gen_fr_restore_x, reg, cfa_off);
2733 /* Restore ar.unat for real. */
2734 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2736 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2737 emit_move_insn (reg, ar_unat_save_reg);
2740 if (cfa_off != current_frame_info.spill_cfa_off)
2743 finish_spill_pointers ();
2745 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2747 /* ??? At this point we must generate a magic insn that appears to
2748 modify the spill iterators, the stack pointer, and the frame
2749 pointer. This would allow the most scheduling freedom. For now,
2751 emit_insn (gen_blockage ());
2754 if (cfun->machine->ia64_eh_epilogue_sp)
2755 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2756 else if (frame_pointer_needed)
2758 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2759 RTX_FRAME_RELATED_P (insn) = 1;
2761 else if (current_frame_info.total_size)
2763 rtx offset, frame_size_rtx;
2765 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2766 if (CONST_OK_FOR_I (current_frame_info.total_size))
2767 offset = frame_size_rtx;
2770 regno = next_scratch_gr_reg ();
2771 offset = gen_rtx_REG (DImode, regno);
2772 emit_move_insn (offset, frame_size_rtx);
2775 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2778 RTX_FRAME_RELATED_P (insn) = 1;
2779 if (GET_CODE (offset) != CONST_INT)
2782 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2783 gen_rtx_SET (VOIDmode,
2785 gen_rtx_PLUS (DImode,
2792 if (cfun->machine->ia64_eh_epilogue_bsp)
2793 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2796 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2799 int fp = GR_REG (2);
2800 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2801 first available call clobbered register. If there was a frame_pointer
2802 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2803 so we have to make sure we're using the string "r2" when emitting
2804 the register name for the assmbler. */
2805 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2806 fp = HARD_FRAME_POINTER_REGNUM;
2808 /* We must emit an alloc to force the input registers to become output
2809 registers. Otherwise, if the callee tries to pass its parameters
2810 through to another call without an intervening alloc, then these
2812 /* ??? We don't need to preserve all input registers. We only need to
2813 preserve those input registers used as arguments to the sibling call.
2814 It is unclear how to compute that number here. */
2815 if (current_frame_info.n_input_regs != 0)
2816 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2817 GEN_INT (0), GEN_INT (0),
2818 GEN_INT (current_frame_info.n_input_regs),
2823 /* Return 1 if br.ret can do all the work required to return from a
2827 ia64_direct_return ()
2829 if (reload_completed && ! frame_pointer_needed)
2831 ia64_compute_frame_size (get_frame_size ());
2833 return (current_frame_info.total_size == 0
2834 && current_frame_info.n_spilled == 0
2835 && current_frame_info.reg_save_b0 == 0
2836 && current_frame_info.reg_save_pr == 0
2837 && current_frame_info.reg_save_ar_pfs == 0
2838 && current_frame_info.reg_save_ar_unat == 0
2839 && current_frame_info.reg_save_ar_lc == 0);
2845 ia64_hard_regno_rename_ok (from, to)
2849 /* Don't clobber any of the registers we reserved for the prologue. */
2850 if (to == current_frame_info.reg_fp
2851 || to == current_frame_info.reg_save_b0
2852 || to == current_frame_info.reg_save_pr
2853 || to == current_frame_info.reg_save_ar_pfs
2854 || to == current_frame_info.reg_save_ar_unat
2855 || to == current_frame_info.reg_save_ar_lc)
2858 if (from == current_frame_info.reg_fp
2859 || from == current_frame_info.reg_save_b0
2860 || from == current_frame_info.reg_save_pr
2861 || from == current_frame_info.reg_save_ar_pfs
2862 || from == current_frame_info.reg_save_ar_unat
2863 || from == current_frame_info.reg_save_ar_lc)
2866 /* Don't use output registers outside the register frame. */
2867 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2870 /* Retain even/oddness on predicate register pairs. */
2871 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2872 return (from & 1) == (to & 1);
2874 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2875 if (from == GR_REG (4) && current_function_calls_setjmp)
2881 /* Target hook for assembling integer objects. Handle word-sized
2882 aligned objects and detect the cases when @fptr is needed. */
2885 ia64_assemble_integer (x, size, aligned_p)
2890 if (size == (TARGET_ILP32 ? 4 : 8)
2892 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2893 && GET_CODE (x) == SYMBOL_REF
2894 && SYMBOL_REF_FLAG (x))
2897 fputs ("\tdata4\t@fptr(", asm_out_file);
2899 fputs ("\tdata8\t@fptr(", asm_out_file);
2900 output_addr_const (asm_out_file, x);
2901 fputs (")\n", asm_out_file);
2904 return default_assemble_integer (x, size, aligned_p);
2907 /* Emit the function prologue. */
2910 ia64_output_function_prologue (file, size)
2912 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2914 int mask, grsave, grsave_prev;
2916 if (current_frame_info.need_regstk)
2917 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2918 current_frame_info.n_input_regs,
2919 current_frame_info.n_local_regs,
2920 current_frame_info.n_output_regs,
2921 current_frame_info.n_rotate_regs);
2923 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2926 /* Emit the .prologue directive. */
2929 grsave = grsave_prev = 0;
2930 if (current_frame_info.reg_save_b0 != 0)
2933 grsave = grsave_prev = current_frame_info.reg_save_b0;
2935 if (current_frame_info.reg_save_ar_pfs != 0
2936 && (grsave_prev == 0
2937 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2940 if (grsave_prev == 0)
2941 grsave = current_frame_info.reg_save_ar_pfs;
2942 grsave_prev = current_frame_info.reg_save_ar_pfs;
2944 if (current_frame_info.reg_fp != 0
2945 && (grsave_prev == 0
2946 || current_frame_info.reg_fp == grsave_prev + 1))
2949 if (grsave_prev == 0)
2950 grsave = HARD_FRAME_POINTER_REGNUM;
2951 grsave_prev = current_frame_info.reg_fp;
2953 if (current_frame_info.reg_save_pr != 0
2954 && (grsave_prev == 0
2955 || current_frame_info.reg_save_pr == grsave_prev + 1))
2958 if (grsave_prev == 0)
2959 grsave = current_frame_info.reg_save_pr;
2963 fprintf (file, "\t.prologue %d, %d\n", mask,
2964 ia64_dbx_register_number (grsave));
2966 fputs ("\t.prologue\n", file);
2968 /* Emit a .spill directive, if necessary, to relocate the base of
2969 the register spill area. */
2970 if (current_frame_info.spill_cfa_off != -16)
2971 fprintf (file, "\t.spill %ld\n",
2972 (long) (current_frame_info.spill_cfa_off
2973 + current_frame_info.spill_size));
2976 /* Emit the .body directive at the scheduled end of the prologue. */
2979 ia64_output_function_end_prologue (file)
2982 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2985 fputs ("\t.body\n", file);
2988 /* Emit the function epilogue. */
2991 ia64_output_function_epilogue (file, size)
2992 FILE *file ATTRIBUTE_UNUSED;
2993 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2997 /* Reset from the function's potential modifications. */
2998 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
3000 if (current_frame_info.reg_fp)
3002 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3003 reg_names[HARD_FRAME_POINTER_REGNUM]
3004 = reg_names[current_frame_info.reg_fp];
3005 reg_names[current_frame_info.reg_fp] = tmp;
3007 if (! TARGET_REG_NAMES)
3009 for (i = 0; i < current_frame_info.n_input_regs; i++)
3010 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3011 for (i = 0; i < current_frame_info.n_local_regs; i++)
3012 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3013 for (i = 0; i < current_frame_info.n_output_regs; i++)
3014 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3017 current_frame_info.initialized = 0;
3021 ia64_dbx_register_number (regno)
3024 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3025 from its home at loc79 to something inside the register frame. We
3026 must perform the same renumbering here for the debug info. */
3027 if (current_frame_info.reg_fp)
3029 if (regno == HARD_FRAME_POINTER_REGNUM)
3030 regno = current_frame_info.reg_fp;
3031 else if (regno == current_frame_info.reg_fp)
3032 regno = HARD_FRAME_POINTER_REGNUM;
3035 if (IN_REGNO_P (regno))
3036 return 32 + regno - IN_REG (0);
3037 else if (LOC_REGNO_P (regno))
3038 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3039 else if (OUT_REGNO_P (regno))
3040 return (32 + current_frame_info.n_input_regs
3041 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3047 ia64_initialize_trampoline (addr, fnaddr, static_chain)
3048 rtx addr, fnaddr, static_chain;
3050 rtx addr_reg, eight = GEN_INT (8);
3052 /* Load up our iterator. */
3053 addr_reg = gen_reg_rtx (Pmode);
3054 emit_move_insn (addr_reg, addr);
3056 /* The first two words are the fake descriptor:
3057 __ia64_trampoline, ADDR+16. */
3058 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3059 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3060 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3062 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3063 copy_to_reg (plus_constant (addr, 16)));
3064 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3066 /* The third word is the target descriptor. */
3067 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3068 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3070 /* The fourth word is the static chain. */
3071 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3074 /* Do any needed setup for a variadic function. CUM has not been updated
3075 for the last named argument which has type TYPE and mode MODE.
3077 We generate the actual spill instructions during prologue generation. */
3080 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3081 CUMULATIVE_ARGS cum;
3085 int second_time ATTRIBUTE_UNUSED;
3087 /* Skip the current argument. */
3088 ia64_function_arg_advance (&cum, int_mode, type, 1);
3090 if (cum.words < MAX_ARGUMENT_SLOTS)
3092 int n = MAX_ARGUMENT_SLOTS - cum.words;
3093 *pretend_size = n * UNITS_PER_WORD;
3094 cfun->machine->n_varargs = n;
3098 /* Check whether TYPE is a homogeneous floating point aggregate. If
3099 it is, return the mode of the floating point type that appears
3100 in all leafs. If it is not, return VOIDmode.
3102 An aggregate is a homogeneous floating point aggregate is if all
3103 fields/elements in it have the same floating point type (e.g,
3104 SFmode). 128-bit quad-precision floats are excluded. */
3106 static enum machine_mode
3107 hfa_element_mode (type, nested)
3111 enum machine_mode element_mode = VOIDmode;
3112 enum machine_mode mode;
3113 enum tree_code code = TREE_CODE (type);
3114 int know_element_mode = 0;
3119 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3120 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3121 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3122 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3126 /* Fortran complex types are supposed to be HFAs, so we need to handle
3127 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3130 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3131 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3132 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3133 * BITS_PER_UNIT, MODE_FLOAT, 0);
3138 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3139 mode if this is contained within an aggregate. */
3140 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3141 return TYPE_MODE (type);
3146 return hfa_element_mode (TREE_TYPE (type), 1);
3150 case QUAL_UNION_TYPE:
3151 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3153 if (TREE_CODE (t) != FIELD_DECL)
3156 mode = hfa_element_mode (TREE_TYPE (t), 1);
3157 if (know_element_mode)
3159 if (mode != element_mode)
3162 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3166 know_element_mode = 1;
3167 element_mode = mode;
3170 return element_mode;
3173 /* If we reach here, we probably have some front-end specific type
3174 that the backend doesn't know about. This can happen via the
3175 aggregate_value_p call in init_function_start. All we can do is
3176 ignore unknown tree types. */
3183 /* Return rtx for register where argument is passed, or zero if it is passed
3186 /* ??? 128-bit quad-precision floats are always passed in general
3190 ia64_function_arg (cum, mode, type, named, incoming)
3191 CUMULATIVE_ARGS *cum;
3192 enum machine_mode mode;
3197 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3198 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3199 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3202 enum machine_mode hfa_mode = VOIDmode;
3204 /* Integer and float arguments larger than 8 bytes start at the next even
3205 boundary. Aggregates larger than 8 bytes start at the next even boundary
3206 if the aggregate has 16 byte alignment. Net effect is that types with
3207 alignment greater than 8 start at the next even boundary. */
3208 /* ??? The ABI does not specify how to handle aggregates with alignment from
3209 9 to 15 bytes, or greater than 16. We handle them all as if they had
3210 16 byte alignment. Such aggregates can occur only if gcc extensions are
3212 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3214 && (cum->words & 1))
3217 /* If all argument slots are used, then it must go on the stack. */
3218 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3221 /* Check for and handle homogeneous FP aggregates. */
3223 hfa_mode = hfa_element_mode (type, 0);
3225 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3226 and unprototyped hfas are passed specially. */
3227 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3231 int fp_regs = cum->fp_regs;
3232 int int_regs = cum->words + offset;
3233 int hfa_size = GET_MODE_SIZE (hfa_mode);
3237 /* If prototyped, pass it in FR regs then GR regs.
3238 If not prototyped, pass it in both FR and GR regs.
3240 If this is an SFmode aggregate, then it is possible to run out of
3241 FR regs while GR regs are still left. In that case, we pass the
3242 remaining part in the GR regs. */
3244 /* Fill the FP regs. We do this always. We stop if we reach the end
3245 of the argument, the last FP register, or the last argument slot. */
3247 byte_size = ((mode == BLKmode)
3248 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3249 args_byte_size = int_regs * UNITS_PER_WORD;
3251 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3252 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3254 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3255 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3259 args_byte_size += hfa_size;
3263 /* If no prototype, then the whole thing must go in GR regs. */
3264 if (! cum->prototype)
3266 /* If this is an SFmode aggregate, then we might have some left over
3267 that needs to go in GR regs. */
3268 else if (byte_size != offset)
3269 int_regs += offset / UNITS_PER_WORD;
3271 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3273 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3275 enum machine_mode gr_mode = DImode;
3277 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3278 then this goes in a GR reg left adjusted/little endian, right
3279 adjusted/big endian. */
3280 /* ??? Currently this is handled wrong, because 4-byte hunks are
3281 always right adjusted/little endian. */
3284 /* If we have an even 4 byte hunk because the aggregate is a
3285 multiple of 4 bytes in size, then this goes in a GR reg right
3286 adjusted/little endian. */
3287 else if (byte_size - offset == 4)
3289 /* Complex floats need to have float mode. */
3290 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3293 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3294 gen_rtx_REG (gr_mode, (basereg
3297 offset += GET_MODE_SIZE (gr_mode);
3298 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3299 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3302 /* If we ended up using just one location, just return that one loc. */
3304 return XEXP (loc[0], 0);
3306 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3309 /* Integral and aggregates go in general registers. If we have run out of
3310 FR registers, then FP values must also go in general registers. This can
3311 happen when we have a SFmode HFA. */
3312 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3313 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3315 int byte_size = ((mode == BLKmode)
3316 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3317 if (BYTES_BIG_ENDIAN
3318 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3319 && byte_size < UNITS_PER_WORD
3322 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3323 gen_rtx_REG (DImode,
3324 (basereg + cum->words
3327 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3330 return gen_rtx_REG (mode, basereg + cum->words + offset);
3334 /* If there is a prototype, then FP values go in a FR register when
3335 named, and in a GR registeer when unnamed. */
3336 else if (cum->prototype)
3339 return gen_rtx_REG (mode, basereg + cum->words + offset);
3341 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3343 /* If there is no prototype, then FP values go in both FR and GR
3347 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3348 gen_rtx_REG (mode, (FR_ARG_FIRST
3351 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3353 (basereg + cum->words
3357 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3361 /* Return number of words, at the beginning of the argument, that must be
3362 put in registers. 0 is the argument is entirely in registers or entirely
3366 ia64_function_arg_partial_nregs (cum, mode, type, named)
3367 CUMULATIVE_ARGS *cum;
3368 enum machine_mode mode;
3370 int named ATTRIBUTE_UNUSED;
3372 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3373 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3377 /* Arguments with alignment larger than 8 bytes start at the next even
3379 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3381 && (cum->words & 1))
3384 /* If all argument slots are used, then it must go on the stack. */
3385 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3388 /* It doesn't matter whether the argument goes in FR or GR regs. If
3389 it fits within the 8 argument slots, then it goes entirely in
3390 registers. If it extends past the last argument slot, then the rest
3391 goes on the stack. */
3393 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3396 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3399 /* Update CUM to point after this argument. This is patterned after
3400 ia64_function_arg. */
3403 ia64_function_arg_advance (cum, mode, type, named)
3404 CUMULATIVE_ARGS *cum;
3405 enum machine_mode mode;
3409 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3410 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3413 enum machine_mode hfa_mode = VOIDmode;
3415 /* If all arg slots are already full, then there is nothing to do. */
3416 if (cum->words >= MAX_ARGUMENT_SLOTS)
3419 /* Arguments with alignment larger than 8 bytes start at the next even
3421 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3423 && (cum->words & 1))
3426 cum->words += words + offset;
3428 /* Check for and handle homogeneous FP aggregates. */
3430 hfa_mode = hfa_element_mode (type, 0);
3432 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3433 and unprototyped hfas are passed specially. */
3434 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3436 int fp_regs = cum->fp_regs;
3437 /* This is the original value of cum->words + offset. */
3438 int int_regs = cum->words - words;
3439 int hfa_size = GET_MODE_SIZE (hfa_mode);
3443 /* If prototyped, pass it in FR regs then GR regs.
3444 If not prototyped, pass it in both FR and GR regs.
3446 If this is an SFmode aggregate, then it is possible to run out of
3447 FR regs while GR regs are still left. In that case, we pass the
3448 remaining part in the GR regs. */
3450 /* Fill the FP regs. We do this always. We stop if we reach the end
3451 of the argument, the last FP register, or the last argument slot. */
3453 byte_size = ((mode == BLKmode)
3454 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3455 args_byte_size = int_regs * UNITS_PER_WORD;
3457 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3458 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3461 args_byte_size += hfa_size;
3465 cum->fp_regs = fp_regs;
3468 /* Integral and aggregates go in general registers. If we have run out of
3469 FR registers, then FP values must also go in general registers. This can
3470 happen when we have a SFmode HFA. */
3471 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3472 cum->int_regs = cum->words;
3474 /* If there is a prototype, then FP values go in a FR register when
3475 named, and in a GR registeer when unnamed. */
3476 else if (cum->prototype)
3479 cum->int_regs = cum->words;
3481 /* ??? Complex types should not reach here. */
3482 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3484 /* If there is no prototype, then FP values go in both FR and GR
3488 /* ??? Complex types should not reach here. */
3489 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3490 cum->int_regs = cum->words;
3494 /* Variable sized types are passed by reference. */
3495 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3498 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3499 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3500 enum machine_mode mode ATTRIBUTE_UNUSED;
3502 int named ATTRIBUTE_UNUSED;
3504 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3508 /* Implement va_arg. */
3511 ia64_va_arg (valist, type)
3516 /* Variable sized types are passed by reference. */
3517 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3519 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3520 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3523 /* Arguments with alignment larger than 8 bytes start at the next even
3525 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3527 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3528 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3529 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3530 build_int_2 (-2 * UNITS_PER_WORD, -1));
3531 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3532 TREE_SIDE_EFFECTS (t) = 1;
3533 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3536 return std_expand_builtin_va_arg (valist, type);
3539 /* Return 1 if function return value returned in memory. Return 0 if it is
3543 ia64_return_in_memory (valtype)
3546 enum machine_mode mode;
3547 enum machine_mode hfa_mode;
3548 HOST_WIDE_INT byte_size;
3550 mode = TYPE_MODE (valtype);
3551 byte_size = GET_MODE_SIZE (mode);
3552 if (mode == BLKmode)
3554 byte_size = int_size_in_bytes (valtype);
3559 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3561 hfa_mode = hfa_element_mode (valtype, 0);
3562 if (hfa_mode != VOIDmode)
3564 int hfa_size = GET_MODE_SIZE (hfa_mode);
3566 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3571 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3577 /* Return rtx for register that holds the function return value. */
3580 ia64_function_value (valtype, func)
3582 tree func ATTRIBUTE_UNUSED;
3584 enum machine_mode mode;
3585 enum machine_mode hfa_mode;
3587 mode = TYPE_MODE (valtype);
3588 hfa_mode = hfa_element_mode (valtype, 0);
3590 if (hfa_mode != VOIDmode)
3598 hfa_size = GET_MODE_SIZE (hfa_mode);
3599 byte_size = ((mode == BLKmode)
3600 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3602 for (i = 0; offset < byte_size; i++)
3604 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3605 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3611 return XEXP (loc[0], 0);
3613 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3615 else if (FLOAT_TYPE_P (valtype) &&
3616 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3617 return gen_rtx_REG (mode, FR_ARG_FIRST);
3620 if (BYTES_BIG_ENDIAN
3621 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3629 bytesize = int_size_in_bytes (valtype);
3630 for (i = 0; offset < bytesize; i++)
3632 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3633 gen_rtx_REG (DImode,
3636 offset += UNITS_PER_WORD;
3638 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3641 return gen_rtx_REG (mode, GR_RET_FIRST);
3645 /* Print a memory address as an operand to reference that memory location. */
3647 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3648 also call this from ia64_print_operand for memory addresses. */
3651 ia64_print_operand_address (stream, address)
3652 FILE * stream ATTRIBUTE_UNUSED;
3653 rtx address ATTRIBUTE_UNUSED;
3657 /* Print an operand to an assembler instruction.
3658 C Swap and print a comparison operator.
3659 D Print an FP comparison operator.
3660 E Print 32 - constant, for SImode shifts as extract.
3661 e Print 64 - constant, for DImode rotates.
3662 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3663 a floating point register emitted normally.
3664 I Invert a predicate register by adding 1.
3665 J Select the proper predicate register for a condition.
3666 j Select the inverse predicate register for a condition.
3667 O Append .acq for volatile load.
3668 P Postincrement of a MEM.
3669 Q Append .rel for volatile store.
3670 S Shift amount for shladd instruction.
3671 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3672 for Intel assembler.
3673 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3674 for Intel assembler.
3675 r Print register name, or constant 0 as r0. HP compatibility for
3678 ia64_print_operand (file, x, code)
3688 /* Handled below. */
3693 enum rtx_code c = swap_condition (GET_CODE (x));
3694 fputs (GET_RTX_NAME (c), file);
3699 switch (GET_CODE (x))
3711 str = GET_RTX_NAME (GET_CODE (x));
3718 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3722 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3726 if (x == CONST0_RTX (GET_MODE (x)))
3727 str = reg_names [FR_REG (0)];
3728 else if (x == CONST1_RTX (GET_MODE (x)))
3729 str = reg_names [FR_REG (1)];
3730 else if (GET_CODE (x) == REG)
3731 str = reg_names [REGNO (x)];
3738 fputs (reg_names [REGNO (x) + 1], file);
3744 unsigned int regno = REGNO (XEXP (x, 0));
3745 if (GET_CODE (x) == EQ)
3749 fputs (reg_names [regno], file);
3754 if (MEM_VOLATILE_P (x))
3755 fputs(".acq", file);
3760 HOST_WIDE_INT value;
3762 switch (GET_CODE (XEXP (x, 0)))
3768 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3769 if (GET_CODE (x) == CONST_INT)
3771 else if (GET_CODE (x) == REG)
3773 fprintf (file, ", %s", reg_names[REGNO (x)]);
3781 value = GET_MODE_SIZE (GET_MODE (x));
3785 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3791 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3796 if (MEM_VOLATILE_P (x))
3797 fputs(".rel", file);
3801 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3805 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3807 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3813 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3815 const char *prefix = "0x";
3816 if (INTVAL (x) & 0x80000000)
3818 fprintf (file, "0xffffffff");
3821 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3827 /* If this operand is the constant zero, write it as register zero.
3828 Any register, zero, or CONST_INT value is OK here. */
3829 if (GET_CODE (x) == REG)
3830 fputs (reg_names[REGNO (x)], file);
3831 else if (x == CONST0_RTX (GET_MODE (x)))
3833 else if (GET_CODE (x) == CONST_INT)
3834 output_addr_const (file, x);
3836 output_operand_lossage ("invalid %%r value");
3843 /* For conditional branches, returns or calls, substitute
3844 sptk, dptk, dpnt, or spnt for %s. */
3845 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3848 int pred_val = INTVAL (XEXP (x, 0));
3850 /* Guess top and bottom 10% statically predicted. */
3851 if (pred_val < REG_BR_PROB_BASE / 50)
3853 else if (pred_val < REG_BR_PROB_BASE / 2)
3855 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3860 else if (GET_CODE (current_output_insn) == CALL_INSN)
3865 fputs (which, file);
3870 x = current_insn_predicate;
3873 unsigned int regno = REGNO (XEXP (x, 0));
3874 if (GET_CODE (x) == EQ)
3876 fprintf (file, "(%s) ", reg_names [regno]);
3881 output_operand_lossage ("ia64_print_operand: unknown code");
3885 switch (GET_CODE (x))
3887 /* This happens for the spill/restore instructions. */
3892 /* ... fall through ... */
3895 fputs (reg_names [REGNO (x)], file);
3900 rtx addr = XEXP (x, 0);
3901 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3902 addr = XEXP (addr, 0);
3903 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3908 output_addr_const (file, x);
3915 /* Calulate the cost of moving data from a register in class FROM to
3916 one in class TO, using MODE. */
3919 ia64_register_move_cost (mode, from, to)
3920 enum machine_mode mode;
3921 enum reg_class from, to;
3923 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3924 if (to == ADDL_REGS)
3926 if (from == ADDL_REGS)
3929 /* All costs are symmetric, so reduce cases by putting the
3930 lower number class as the destination. */
3933 enum reg_class tmp = to;
3934 to = from, from = tmp;
3937 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3938 so that we get secondary memory reloads. Between FR_REGS,
3939 we have to make this at least as expensive as MEMORY_MOVE_COST
3940 to avoid spectacularly poor register class preferencing. */
3943 if (to != GR_REGS || from != GR_REGS)
3944 return MEMORY_MOVE_COST (mode, to, 0);
3952 /* Moving between PR registers takes two insns. */
3953 if (from == PR_REGS)
3955 /* Moving between PR and anything but GR is impossible. */
3956 if (from != GR_REGS)
3957 return MEMORY_MOVE_COST (mode, to, 0);
3961 /* Moving between BR and anything but GR is impossible. */
3962 if (from != GR_REGS && from != GR_AND_BR_REGS)
3963 return MEMORY_MOVE_COST (mode, to, 0);
3968 /* Moving between AR and anything but GR is impossible. */
3969 if (from != GR_REGS)
3970 return MEMORY_MOVE_COST (mode, to, 0);
3975 case GR_AND_FR_REGS:
3976 case GR_AND_BR_REGS:
3987 /* This function returns the register class required for a secondary
3988 register when copying between one of the registers in CLASS, and X,
3989 using MODE. A return value of NO_REGS means that no secondary register
3993 ia64_secondary_reload_class (class, mode, x)
3994 enum reg_class class;
3995 enum machine_mode mode ATTRIBUTE_UNUSED;
4000 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4001 regno = true_regnum (x);
4008 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4009 interaction. We end up with two pseudos with overlapping lifetimes
4010 both of which are equiv to the same constant, and both which need
4011 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4012 changes depending on the path length, which means the qty_first_reg
4013 check in make_regs_eqv can give different answers at different times.
4014 At some point I'll probably need a reload_indi pattern to handle
4017 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4018 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4019 non-general registers for good measure. */
4020 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4023 /* This is needed if a pseudo used as a call_operand gets spilled to a
4025 if (GET_CODE (x) == MEM)
4030 /* Need to go through general regsters to get to other class regs. */
4031 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4034 /* This can happen when a paradoxical subreg is an operand to the
4036 /* ??? This shouldn't be necessary after instruction scheduling is
4037 enabled, because paradoxical subregs are not accepted by
4038 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4039 stop the paradoxical subreg stupidity in the *_operand functions
4041 if (GET_CODE (x) == MEM
4042 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4043 || GET_MODE (x) == QImode))
4046 /* This can happen because of the ior/and/etc patterns that accept FP
4047 registers as operands. If the third operand is a constant, then it
4048 needs to be reloaded into a FP register. */
4049 if (GET_CODE (x) == CONST_INT)
4052 /* This can happen because of register elimination in a muldi3 insn.
4053 E.g. `26107 * (unsigned long)&u'. */
4054 if (GET_CODE (x) == PLUS)
4059 /* ??? This happens if we cse/gcse a BImode value across a call,
4060 and the function has a nonlocal goto. This is because global
4061 does not allocate call crossing pseudos to hard registers when
4062 current_function_has_nonlocal_goto is true. This is relatively
4063 common for C++ programs that use exceptions. To reproduce,
4064 return NO_REGS and compile libstdc++. */
4065 if (GET_CODE (x) == MEM)
4068 /* This can happen when we take a BImode subreg of a DImode value,
4069 and that DImode value winds up in some non-GR register. */
4070 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4075 /* Since we have no offsettable memory addresses, we need a temporary
4076 to hold the address of the second word. */
4089 /* Emit text to declare externally defined variables and functions, because
4090 the Intel assembler does not support undefined externals. */
4093 ia64_asm_output_external (file, decl, name)
4098 int save_referenced;
4100 /* GNU as does not need anything here, but the HP linker does need
4101 something for external functions. */
4105 || TREE_CODE (decl) != FUNCTION_DECL
4106 || strstr(name, "__builtin_") == name))
4109 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4110 the linker when we do this, so we need to be careful not to do this for
4111 builtin functions which have no library equivalent. Unfortunately, we
4112 can't tell here whether or not a function will actually be called by
4113 expand_expr, so we pull in library functions even if we may not need
4115 if (! strcmp (name, "__builtin_next_arg")
4116 || ! strcmp (name, "alloca")
4117 || ! strcmp (name, "__builtin_constant_p")
4118 || ! strcmp (name, "__builtin_args_info"))
4122 ia64_hpux_add_extern_decl (name);
4125 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4127 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4128 if (TREE_CODE (decl) == FUNCTION_DECL)
4129 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4130 (*targetm.asm_out.globalize_label) (file, name);
4131 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4135 /* Parse the -mfixed-range= option string. */
4138 fix_range (const_str)
4139 const char *const_str;
4142 char *str, *dash, *comma;
4144 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4145 REG2 are either register names or register numbers. The effect
4146 of this option is to mark the registers in the range from REG1 to
4147 REG2 as ``fixed'' so they won't be used by the compiler. This is
4148 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4150 i = strlen (const_str);
4151 str = (char *) alloca (i + 1);
4152 memcpy (str, const_str, i + 1);
4156 dash = strchr (str, '-');
4159 warning ("value of -mfixed-range must have form REG1-REG2");
4164 comma = strchr (dash + 1, ',');
4168 first = decode_reg_name (str);
4171 warning ("unknown register name: %s", str);
4175 last = decode_reg_name (dash + 1);
4178 warning ("unknown register name: %s", dash + 1);
4186 warning ("%s-%s is an empty range", str, dash + 1);
4190 for (i = first; i <= last; ++i)
4191 fixed_regs[i] = call_used_regs[i] = 1;
4201 static struct machine_function *
4202 ia64_init_machine_status ()
4204 return ggc_alloc_cleared (sizeof (struct machine_function));
4207 /* Handle TARGET_OPTIONS switches. */
4210 ia64_override_options ()
4212 if (TARGET_AUTO_PIC)
4213 target_flags |= MASK_CONST_GP;
4215 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4217 warning ("cannot optimize floating point division for both latency and throughput");
4218 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4221 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4223 warning ("cannot optimize integer division for both latency and throughput");
4224 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4227 if (ia64_fixed_range_string)
4228 fix_range (ia64_fixed_range_string);
4230 if (ia64_tls_size_string)
4233 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4234 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4235 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4237 ia64_tls_size = tmp;
4240 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4241 flag_schedule_insns_after_reload = 0;
4243 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4245 init_machine_status = ia64_init_machine_status;
4247 /* Tell the compiler which flavor of TFmode we're using. */
4248 if (INTEL_EXTENDED_IEEE_FORMAT)
4249 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4252 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
4253 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4254 static enum attr_type ia64_safe_type PARAMS((rtx));
4256 static enum attr_itanium_requires_unit0
4257 ia64_safe_itanium_requires_unit0 (insn)
4260 if (recog_memoized (insn) >= 0)
4261 return get_attr_itanium_requires_unit0 (insn);
4263 return ITANIUM_REQUIRES_UNIT0_NO;
4266 static enum attr_itanium_class
4267 ia64_safe_itanium_class (insn)
4270 if (recog_memoized (insn) >= 0)
4271 return get_attr_itanium_class (insn);
4273 return ITANIUM_CLASS_UNKNOWN;
4276 static enum attr_type
4277 ia64_safe_type (insn)
4280 if (recog_memoized (insn) >= 0)
4281 return get_attr_type (insn);
4283 return TYPE_UNKNOWN;
4286 /* The following collection of routines emit instruction group stop bits as
4287 necessary to avoid dependencies. */
4289 /* Need to track some additional registers as far as serialization is
4290 concerned so we can properly handle br.call and br.ret. We could
4291 make these registers visible to gcc, but since these registers are
4292 never explicitly used in gcc generated code, it seems wasteful to
4293 do so (plus it would make the call and return patterns needlessly
4295 #define REG_GP (GR_REG (1))
4296 #define REG_RP (BR_REG (0))
4297 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4298 /* This is used for volatile asms which may require a stop bit immediately
4299 before and after them. */
4300 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4301 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4302 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4304 /* For each register, we keep track of how it has been written in the
4305 current instruction group.
4307 If a register is written unconditionally (no qualifying predicate),
4308 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4310 If a register is written if its qualifying predicate P is true, we
4311 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4312 may be written again by the complement of P (P^1) and when this happens,
4313 WRITE_COUNT gets set to 2.
4315 The result of this is that whenever an insn attempts to write a register
4316 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4318 If a predicate register is written by a floating-point insn, we set
4319 WRITTEN_BY_FP to true.
4321 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4322 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4324 struct reg_write_state
4326 unsigned int write_count : 2;
4327 unsigned int first_pred : 16;
4328 unsigned int written_by_fp : 1;
4329 unsigned int written_by_and : 1;
4330 unsigned int written_by_or : 1;
4333 /* Cumulative info for the current instruction group. */
4334 struct reg_write_state rws_sum[NUM_REGS];
4335 /* Info for the current instruction. This gets copied to rws_sum after a
4336 stop bit is emitted. */
4337 struct reg_write_state rws_insn[NUM_REGS];
4339 /* Indicates whether this is the first instruction after a stop bit,
4340 in which case we don't need another stop bit. Without this, we hit
4341 the abort in ia64_variable_issue when scheduling an alloc. */
4342 static int first_instruction;
4344 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4345 RTL for one instruction. */
4348 unsigned int is_write : 1; /* Is register being written? */
4349 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4350 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4351 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4352 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4353 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4356 static void rws_update PARAMS ((struct reg_write_state *, int,
4357 struct reg_flags, int));
4358 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4359 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4360 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4361 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4362 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4363 static void init_insn_group_barriers PARAMS ((void));
4364 static int group_barrier_needed_p PARAMS ((rtx));
4365 static int safe_group_barrier_needed_p PARAMS ((rtx));
4367 /* Update *RWS for REGNO, which is being written by the current instruction,
4368 with predicate PRED, and associated register flags in FLAGS. */
4371 rws_update (rws, regno, flags, pred)
4372 struct reg_write_state *rws;
4374 struct reg_flags flags;
4378 rws[regno].write_count++;
4380 rws[regno].write_count = 2;
4381 rws[regno].written_by_fp |= flags.is_fp;
4382 /* ??? Not tracking and/or across differing predicates. */
4383 rws[regno].written_by_and = flags.is_and;
4384 rws[regno].written_by_or = flags.is_or;
4385 rws[regno].first_pred = pred;
4388 /* Handle an access to register REGNO of type FLAGS using predicate register
4389 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4390 a dependency with an earlier instruction in the same group. */
4393 rws_access_regno (regno, flags, pred)
4395 struct reg_flags flags;
4398 int need_barrier = 0;
4400 if (regno >= NUM_REGS)
4403 if (! PR_REGNO_P (regno))
4404 flags.is_and = flags.is_or = 0;
4410 /* One insn writes same reg multiple times? */
4411 if (rws_insn[regno].write_count > 0)
4414 /* Update info for current instruction. */
4415 rws_update (rws_insn, regno, flags, pred);
4416 write_count = rws_sum[regno].write_count;
4418 switch (write_count)
4421 /* The register has not been written yet. */
4422 rws_update (rws_sum, regno, flags, pred);
4426 /* The register has been written via a predicate. If this is
4427 not a complementary predicate, then we need a barrier. */
4428 /* ??? This assumes that P and P+1 are always complementary
4429 predicates for P even. */
4430 if (flags.is_and && rws_sum[regno].written_by_and)
4432 else if (flags.is_or && rws_sum[regno].written_by_or)
4434 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4436 rws_update (rws_sum, regno, flags, pred);
4440 /* The register has been unconditionally written already. We
4442 if (flags.is_and && rws_sum[regno].written_by_and)
4444 else if (flags.is_or && rws_sum[regno].written_by_or)
4448 rws_sum[regno].written_by_and = flags.is_and;
4449 rws_sum[regno].written_by_or = flags.is_or;
4458 if (flags.is_branch)
4460 /* Branches have several RAW exceptions that allow to avoid
4463 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4464 /* RAW dependencies on branch regs are permissible as long
4465 as the writer is a non-branch instruction. Since we
4466 never generate code that uses a branch register written
4467 by a branch instruction, handling this case is
4471 if (REGNO_REG_CLASS (regno) == PR_REGS
4472 && ! rws_sum[regno].written_by_fp)
4473 /* The predicates of a branch are available within the
4474 same insn group as long as the predicate was written by
4475 something other than a floating-point instruction. */
4479 if (flags.is_and && rws_sum[regno].written_by_and)
4481 if (flags.is_or && rws_sum[regno].written_by_or)
4484 switch (rws_sum[regno].write_count)
4487 /* The register has not been written yet. */
4491 /* The register has been written via a predicate. If this is
4492 not a complementary predicate, then we need a barrier. */
4493 /* ??? This assumes that P and P+1 are always complementary
4494 predicates for P even. */
4495 if ((rws_sum[regno].first_pred ^ 1) != pred)
4500 /* The register has been unconditionally written already. We
4510 return need_barrier;
4514 rws_access_reg (reg, flags, pred)
4516 struct reg_flags flags;
4519 int regno = REGNO (reg);
4520 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4523 return rws_access_regno (regno, flags, pred);
4526 int need_barrier = 0;
4528 need_barrier |= rws_access_regno (regno + n, flags, pred);
4529 return need_barrier;
4533 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4534 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4537 update_set_flags (x, pflags, ppred, pcond)
4539 struct reg_flags *pflags;
4543 rtx src = SET_SRC (x);
4547 switch (GET_CODE (src))
4553 if (SET_DEST (x) == pc_rtx)
4554 /* X is a conditional branch. */
4558 int is_complemented = 0;
4560 /* X is a conditional move. */
4561 rtx cond = XEXP (src, 0);
4562 if (GET_CODE (cond) == EQ)
4563 is_complemented = 1;
4564 cond = XEXP (cond, 0);
4565 if (GET_CODE (cond) != REG
4566 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4569 if (XEXP (src, 1) == SET_DEST (x)
4570 || XEXP (src, 2) == SET_DEST (x))
4572 /* X is a conditional move that conditionally writes the
4575 /* We need another complement in this case. */
4576 if (XEXP (src, 1) == SET_DEST (x))
4577 is_complemented = ! is_complemented;
4579 *ppred = REGNO (cond);
4580 if (is_complemented)
4584 /* ??? If this is a conditional write to the dest, then this
4585 instruction does not actually read one source. This probably
4586 doesn't matter, because that source is also the dest. */
4587 /* ??? Multiple writes to predicate registers are allowed
4588 if they are all AND type compares, or if they are all OR
4589 type compares. We do not generate such instructions
4592 /* ... fall through ... */
4595 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4596 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4597 /* Set pflags->is_fp to 1 so that we know we're dealing
4598 with a floating point comparison when processing the
4599 destination of the SET. */
4602 /* Discover if this is a parallel comparison. We only handle
4603 and.orcm and or.andcm at present, since we must retain a
4604 strict inverse on the predicate pair. */
4605 else if (GET_CODE (src) == AND)
4607 else if (GET_CODE (src) == IOR)
4614 /* Subroutine of rtx_needs_barrier; this function determines whether the
4615 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4616 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4620 set_src_needs_barrier (x, flags, pred, cond)
4622 struct reg_flags flags;
4626 int need_barrier = 0;
4628 rtx src = SET_SRC (x);
4630 if (GET_CODE (src) == CALL)
4631 /* We don't need to worry about the result registers that
4632 get written by subroutine call. */
4633 return rtx_needs_barrier (src, flags, pred);
4634 else if (SET_DEST (x) == pc_rtx)
4636 /* X is a conditional branch. */
4637 /* ??? This seems redundant, as the caller sets this bit for
4639 flags.is_branch = 1;
4640 return rtx_needs_barrier (src, flags, pred);
4643 need_barrier = rtx_needs_barrier (src, flags, pred);
4645 /* This instruction unconditionally uses a predicate register. */
4647 need_barrier |= rws_access_reg (cond, flags, 0);
4650 if (GET_CODE (dst) == ZERO_EXTRACT)
4652 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4653 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4654 dst = XEXP (dst, 0);
4656 return need_barrier;
4659 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4660 Return 1 is this access creates a dependency with an earlier instruction
4661 in the same group. */
4664 rtx_needs_barrier (x, flags, pred)
4666 struct reg_flags flags;
4670 int is_complemented = 0;
4671 int need_barrier = 0;
4672 const char *format_ptr;
4673 struct reg_flags new_flags;
4681 switch (GET_CODE (x))
4684 update_set_flags (x, &new_flags, &pred, &cond);
4685 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4686 if (GET_CODE (SET_SRC (x)) != CALL)
4688 new_flags.is_write = 1;
4689 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4694 new_flags.is_write = 0;
4695 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4697 /* Avoid multiple register writes, in case this is a pattern with
4698 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4699 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4701 new_flags.is_write = 1;
4702 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4703 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4704 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4709 /* X is a predicated instruction. */
4711 cond = COND_EXEC_TEST (x);
4714 need_barrier = rtx_needs_barrier (cond, flags, 0);
4716 if (GET_CODE (cond) == EQ)
4717 is_complemented = 1;
4718 cond = XEXP (cond, 0);
4719 if (GET_CODE (cond) != REG
4720 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4722 pred = REGNO (cond);
4723 if (is_complemented)
4726 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4727 return need_barrier;
4731 /* Clobber & use are for earlier compiler-phases only. */
4736 /* We always emit stop bits for traditional asms. We emit stop bits
4737 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4738 if (GET_CODE (x) != ASM_OPERANDS
4739 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4741 /* Avoid writing the register multiple times if we have multiple
4742 asm outputs. This avoids an abort in rws_access_reg. */
4743 if (! rws_insn[REG_VOLATILE].write_count)
4745 new_flags.is_write = 1;
4746 rws_access_regno (REG_VOLATILE, new_flags, pred);
4751 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4752 We can not just fall through here since then we would be confused
4753 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4754 traditional asms unlike their normal usage. */
4756 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4757 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4762 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4764 rtx pat = XVECEXP (x, 0, i);
4765 if (GET_CODE (pat) == SET)
4767 update_set_flags (pat, &new_flags, &pred, &cond);
4768 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4770 else if (GET_CODE (pat) == USE
4771 || GET_CODE (pat) == CALL
4772 || GET_CODE (pat) == ASM_OPERANDS)
4773 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4774 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4777 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4779 rtx pat = XVECEXP (x, 0, i);
4780 if (GET_CODE (pat) == SET)
4782 if (GET_CODE (SET_SRC (pat)) != CALL)
4784 new_flags.is_write = 1;
4785 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4789 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4790 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4798 if (REGNO (x) == AR_UNAT_REGNUM)
4800 for (i = 0; i < 64; ++i)
4801 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4804 need_barrier = rws_access_reg (x, flags, pred);
4808 /* Find the regs used in memory address computation. */
4809 new_flags.is_write = 0;
4810 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4813 case CONST_INT: case CONST_DOUBLE:
4814 case SYMBOL_REF: case LABEL_REF: case CONST:
4817 /* Operators with side-effects. */
4818 case POST_INC: case POST_DEC:
4819 if (GET_CODE (XEXP (x, 0)) != REG)
4822 new_flags.is_write = 0;
4823 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4824 new_flags.is_write = 1;
4825 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4829 if (GET_CODE (XEXP (x, 0)) != REG)
4832 new_flags.is_write = 0;
4833 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4834 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4835 new_flags.is_write = 1;
4836 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4839 /* Handle common unary and binary ops for efficiency. */
4840 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4841 case MOD: case UDIV: case UMOD: case AND: case IOR:
4842 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4843 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4844 case NE: case EQ: case GE: case GT: case LE:
4845 case LT: case GEU: case GTU: case LEU: case LTU:
4846 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4847 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4850 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4851 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4852 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4853 case SQRT: case FFS:
4854 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4858 switch (XINT (x, 1))
4860 case UNSPEC_LTOFF_DTPMOD:
4861 case UNSPEC_LTOFF_DTPREL:
4863 case UNSPEC_LTOFF_TPREL:
4865 case UNSPEC_PRED_REL_MUTEX:
4866 case UNSPEC_PIC_CALL:
4868 case UNSPEC_FETCHADD_ACQ:
4869 case UNSPEC_BSP_VALUE:
4870 case UNSPEC_FLUSHRS:
4871 case UNSPEC_BUNDLE_SELECTOR:
4874 case UNSPEC_GR_SPILL:
4875 case UNSPEC_GR_RESTORE:
4877 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4878 HOST_WIDE_INT bit = (offset >> 3) & 63;
4880 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4881 new_flags.is_write = (XINT (x, 1) == 1);
4882 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4887 case UNSPEC_FR_SPILL:
4888 case UNSPEC_FR_RESTORE:
4890 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4894 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4897 case UNSPEC_FR_RECIP_APPROX:
4898 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4899 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4902 case UNSPEC_CMPXCHG_ACQ:
4903 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4904 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4912 case UNSPEC_VOLATILE:
4913 switch (XINT (x, 1))
4916 /* Alloc must always be the first instruction of a group.
4917 We force this by always returning true. */
4918 /* ??? We might get better scheduling if we explicitly check for
4919 input/local/output register dependencies, and modify the
4920 scheduler so that alloc is always reordered to the start of
4921 the current group. We could then eliminate all of the
4922 first_instruction code. */
4923 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4925 new_flags.is_write = 1;
4926 rws_access_regno (REG_AR_CFM, new_flags, pred);
4929 case UNSPECV_SET_BSP:
4933 case UNSPECV_BLOCKAGE:
4934 case UNSPECV_INSN_GROUP_BARRIER:
4936 case UNSPECV_PSAC_ALL:
4937 case UNSPECV_PSAC_NORMAL:
4946 new_flags.is_write = 0;
4947 need_barrier = rws_access_regno (REG_RP, flags, pred);
4948 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4950 new_flags.is_write = 1;
4951 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4952 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4956 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4957 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4958 switch (format_ptr[i])
4960 case '0': /* unused field */
4961 case 'i': /* integer */
4962 case 'n': /* note */
4963 case 'w': /* wide integer */
4964 case 's': /* pointer to string */
4965 case 'S': /* optional pointer to string */
4969 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4974 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4975 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4984 return need_barrier;
4987 /* Clear out the state for group_barrier_needed_p at the start of a
4988 sequence of insns. */
4991 init_insn_group_barriers ()
4993 memset (rws_sum, 0, sizeof (rws_sum));
4994 first_instruction = 1;
4997 /* Given the current state, recorded by previous calls to this function,
4998 determine whether a group barrier (a stop bit) is necessary before INSN.
4999 Return nonzero if so. */
5002 group_barrier_needed_p (insn)
5006 int need_barrier = 0;
5007 struct reg_flags flags;
5009 memset (&flags, 0, sizeof (flags));
5010 switch (GET_CODE (insn))
5016 /* A barrier doesn't imply an instruction group boundary. */
5020 memset (rws_insn, 0, sizeof (rws_insn));
5024 flags.is_branch = 1;
5025 flags.is_sibcall = SIBLING_CALL_P (insn);
5026 memset (rws_insn, 0, sizeof (rws_insn));
5028 /* Don't bundle a call following another call. */
5029 if ((pat = prev_active_insn (insn))
5030 && GET_CODE (pat) == CALL_INSN)
5036 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5040 flags.is_branch = 1;
5042 /* Don't bundle a jump following a call. */
5043 if ((pat = prev_active_insn (insn))
5044 && GET_CODE (pat) == CALL_INSN)
5052 if (GET_CODE (PATTERN (insn)) == USE
5053 || GET_CODE (PATTERN (insn)) == CLOBBER)
5054 /* Don't care about USE and CLOBBER "insns"---those are used to
5055 indicate to the optimizer that it shouldn't get rid of
5056 certain operations. */
5059 pat = PATTERN (insn);
5061 /* Ug. Hack hacks hacked elsewhere. */
5062 switch (recog_memoized (insn))
5064 /* We play dependency tricks with the epilogue in order
5065 to get proper schedules. Undo this for dv analysis. */
5066 case CODE_FOR_epilogue_deallocate_stack:
5067 case CODE_FOR_prologue_allocate_stack:
5068 pat = XVECEXP (pat, 0, 0);
5071 /* The pattern we use for br.cloop confuses the code above.
5072 The second element of the vector is representative. */
5073 case CODE_FOR_doloop_end_internal:
5074 pat = XVECEXP (pat, 0, 1);
5077 /* Doesn't generate code. */
5078 case CODE_FOR_pred_rel_mutex:
5079 case CODE_FOR_prologue_use:
5086 memset (rws_insn, 0, sizeof (rws_insn));
5087 need_barrier = rtx_needs_barrier (pat, flags, 0);
5089 /* Check to see if the previous instruction was a volatile
5092 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5099 if (first_instruction)
5102 first_instruction = 0;
5105 return need_barrier;
5108 /* Like group_barrier_needed_p, but do not clobber the current state. */
5111 safe_group_barrier_needed_p (insn)
5114 struct reg_write_state rws_saved[NUM_REGS];
5115 int saved_first_instruction;
5118 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5119 saved_first_instruction = first_instruction;
5121 t = group_barrier_needed_p (insn);
5123 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5124 first_instruction = saved_first_instruction;
5129 /* INSNS is a chain of instructions. Scan the chain, and insert stop bits
5130 as necessary to eliminate dependendencies. This function assumes that
5131 a final instruction scheduling pass has been run which has already
5132 inserted most of the necessary stop bits. This function only inserts
5133 new ones at basic block boundaries, since these are invisible to the
5137 emit_insn_group_barriers (dump, insns)
5143 int insns_since_last_label = 0;
5145 init_insn_group_barriers ();
5147 for (insn = insns; insn; insn = NEXT_INSN (insn))
5149 if (GET_CODE (insn) == CODE_LABEL)
5151 if (insns_since_last_label)
5153 insns_since_last_label = 0;
5155 else if (GET_CODE (insn) == NOTE
5156 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5158 if (insns_since_last_label)
5160 insns_since_last_label = 0;
5162 else if (GET_CODE (insn) == INSN
5163 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5164 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5166 init_insn_group_barriers ();
5169 else if (INSN_P (insn))
5171 insns_since_last_label = 1;
5173 if (group_barrier_needed_p (insn))
5178 fprintf (dump, "Emitting stop before label %d\n",
5179 INSN_UID (last_label));
5180 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5183 init_insn_group_barriers ();
5191 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5192 This function has to emit all necessary group barriers. */
5195 emit_all_insn_group_barriers (dump, insns)
5196 FILE *dump ATTRIBUTE_UNUSED;
5201 init_insn_group_barriers ();
5203 for (insn = insns; insn; insn = NEXT_INSN (insn))
5205 if (GET_CODE (insn) == BARRIER)
5207 rtx last = prev_active_insn (insn);
5211 if (GET_CODE (last) == JUMP_INSN
5212 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5213 last = prev_active_insn (last);
5214 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5215 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5217 init_insn_group_barriers ();
5219 else if (INSN_P (insn))
5221 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5222 init_insn_group_barriers ();
5223 else if (group_barrier_needed_p (insn))
5225 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5226 init_insn_group_barriers ();
5227 group_barrier_needed_p (insn);
5233 static int errata_find_address_regs PARAMS ((rtx *, void *));
5234 static void errata_emit_nops PARAMS ((rtx));
5235 static void fixup_errata PARAMS ((void));
5237 /* This structure is used to track some details about the previous insns
5238 groups so we can determine if it may be necessary to insert NOPs to
5239 workaround hardware errata. */
5242 HARD_REG_SET p_reg_set;
5243 HARD_REG_SET gr_reg_conditionally_set;
5246 /* Index into the last_group array. */
5247 static int group_idx;
5249 /* Called through for_each_rtx; determines if a hard register that was
5250 conditionally set in the previous group is used as an address register.
5251 It ensures that for_each_rtx returns 1 in that case. */
5253 errata_find_address_regs (xp, data)
5255 void *data ATTRIBUTE_UNUSED;
5258 if (GET_CODE (x) != MEM)
5261 if (GET_CODE (x) == POST_MODIFY)
5263 if (GET_CODE (x) == REG)
5265 struct group *prev_group = last_group + (group_idx ^ 1);
5266 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5274 /* Called for each insn; this function keeps track of the state in
5275 last_group and emits additional NOPs if necessary to work around
5276 an Itanium A/B step erratum. */
5278 errata_emit_nops (insn)
5281 struct group *this_group = last_group + group_idx;
5282 struct group *prev_group = last_group + (group_idx ^ 1);
5283 rtx pat = PATTERN (insn);
5284 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5285 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5286 enum attr_type type;
5289 if (GET_CODE (real_pat) == USE
5290 || GET_CODE (real_pat) == CLOBBER
5291 || GET_CODE (real_pat) == ASM_INPUT
5292 || GET_CODE (real_pat) == ADDR_VEC
5293 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5294 || asm_noperands (PATTERN (insn)) >= 0)
5297 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5300 if (GET_CODE (set) == PARALLEL)
5303 set = XVECEXP (real_pat, 0, 0);
5304 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5305 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5306 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5313 if (set && GET_CODE (set) != SET)
5316 type = get_attr_type (insn);
5319 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5320 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5322 if ((type == TYPE_M || type == TYPE_A) && cond && set
5323 && REG_P (SET_DEST (set))
5324 && GET_CODE (SET_SRC (set)) != PLUS
5325 && GET_CODE (SET_SRC (set)) != MINUS
5326 && (GET_CODE (SET_SRC (set)) != ASHIFT
5327 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5328 && (GET_CODE (SET_SRC (set)) != MEM
5329 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5330 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5332 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5333 || ! REG_P (XEXP (cond, 0)))
5336 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5337 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5339 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5341 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5342 emit_insn_before (gen_nop (), insn);
5343 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5345 memset (last_group, 0, sizeof last_group);
5349 /* Emit extra nops if they are required to work around hardware errata. */
5356 if (! TARGET_B_STEP)
5360 memset (last_group, 0, sizeof last_group);
5362 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5367 if (ia64_safe_type (insn) == TYPE_S)
5370 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5373 errata_emit_nops (insn);
5377 /* Instruction scheduling support. */
5378 /* Describe one bundle. */
5382 /* Zero if there's no possibility of a stop in this bundle other than
5383 at the end, otherwise the position of the optional stop bit. */
5385 /* The types of the three slots. */
5386 enum attr_type t[3];
5387 /* The pseudo op to be emitted into the assembler output. */
5391 #define NR_BUNDLES 10
5393 /* A list of all available bundles. */
5395 static const struct bundle bundle[NR_BUNDLES] =
5397 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5398 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5399 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5400 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5401 #if NR_BUNDLES == 10
5402 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5403 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5405 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5406 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5407 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5408 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5409 it matches an L type insn. Otherwise we'll try to generate L type
5411 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5414 /* Describe a packet of instructions. Packets consist of two bundles that
5415 are visible to the hardware in one scheduling window. */
5419 const struct bundle *t1, *t2;
5420 /* Precomputed value of the first split issue in this packet if a cycle
5421 starts at its beginning. */
5423 /* For convenience, the insn types are replicated here so we don't have
5424 to go through T1 and T2 all the time. */
5425 enum attr_type t[6];
5428 /* An array containing all possible packets. */
5429 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5430 static struct ia64_packet packets[NR_PACKETS];
5432 /* Map attr_type to a string with the name. */
5434 static const char *const type_names[] =
5436 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5439 /* Nonzero if we should insert stop bits into the schedule. */
5440 int ia64_final_schedule = 0;
5442 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5443 static rtx ia64_single_set PARAMS ((rtx));
5444 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5445 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5446 static void maybe_rotate PARAMS ((FILE *));
5447 static void finish_last_head PARAMS ((FILE *, int));
5448 static void rotate_one_bundle PARAMS ((FILE *));
5449 static void rotate_two_bundles PARAMS ((FILE *));
5450 static void nop_cycles_until PARAMS ((int, FILE *));
5451 static void cycle_end_fill_slots PARAMS ((FILE *));
5452 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5453 static int get_split PARAMS ((const struct ia64_packet *, int));
5454 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5455 const struct ia64_packet *, int));
5456 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5457 rtx *, enum attr_type *, int));
5458 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5459 static void dump_current_packet PARAMS ((FILE *));
5460 static void schedule_stop PARAMS ((FILE *));
5461 static rtx gen_nop_type PARAMS ((enum attr_type));
5462 static void ia64_emit_nops PARAMS ((void));
5464 /* Map a bundle number to its pseudo-op. */
5470 return bundle[b].name;
5473 /* Compute the slot which will cause a split issue in packet P if the
5474 current cycle begins at slot BEGIN. */
5477 itanium_split_issue (p, begin)
5478 const struct ia64_packet *p;
5481 int type_count[TYPE_S];
5487 /* Always split before and after MMF. */
5488 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5490 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5492 /* Always split after MBB and BBB. */
5493 if (p->t[1] == TYPE_B)
5495 /* Split after first bundle in MIB BBB combination. */
5496 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5500 memset (type_count, 0, sizeof type_count);
5501 for (i = begin; i < split; i++)
5503 enum attr_type t0 = p->t[i];
5504 /* An MLX bundle reserves the same units as an MFI bundle. */
5505 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5506 : t0 == TYPE_X ? TYPE_I
5509 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5510 2 integer per cycle. */
5511 int max = (t == TYPE_B ? 3 : 2);
5512 if (type_count[t] == max)
5520 /* Return the maximum number of instructions a cpu can issue. */
5528 /* Helper function - like single_set, but look inside COND_EXEC. */
5531 ia64_single_set (insn)
5534 rtx x = PATTERN (insn), ret;
5535 if (GET_CODE (x) == COND_EXEC)
5536 x = COND_EXEC_CODE (x);
5537 if (GET_CODE (x) == SET)
5540 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5541 Although they are not classical single set, the second set is there just
5542 to protect it from moving past FP-relative stack accesses. */
5543 switch (recog_memoized (insn))
5545 case CODE_FOR_prologue_allocate_stack:
5546 case CODE_FOR_epilogue_deallocate_stack:
5547 ret = XVECEXP (x, 0, 0);
5551 ret = single_set_2 (insn, x);
5558 /* Adjust the cost of a scheduling dependency. Return the new cost of
5559 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5562 ia64_adjust_cost (insn, link, dep_insn, cost)
5563 rtx insn, link, dep_insn;
5566 enum attr_type dep_type;
5567 enum attr_itanium_class dep_class;
5568 enum attr_itanium_class insn_class;
5569 rtx dep_set, set, src, addr;
5571 if (GET_CODE (PATTERN (insn)) == CLOBBER
5572 || GET_CODE (PATTERN (insn)) == USE
5573 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5574 || GET_CODE (PATTERN (dep_insn)) == USE
5575 /* @@@ Not accurate for indirect calls. */
5576 || GET_CODE (insn) == CALL_INSN
5577 || ia64_safe_type (insn) == TYPE_S)
5580 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5581 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5584 dep_type = ia64_safe_type (dep_insn);
5585 dep_class = ia64_safe_itanium_class (dep_insn);
5586 insn_class = ia64_safe_itanium_class (insn);
5588 /* Compares that feed a conditional branch can execute in the same
5590 dep_set = ia64_single_set (dep_insn);
5591 set = ia64_single_set (insn);
5593 if (dep_type != TYPE_F
5595 && GET_CODE (SET_DEST (dep_set)) == REG
5596 && PR_REG (REGNO (SET_DEST (dep_set)))
5597 && GET_CODE (insn) == JUMP_INSN)
5600 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5602 /* ??? Can't find any information in the documenation about whether
5606 splits issue. Assume it doesn't. */
5610 src = set ? SET_SRC (set) : 0;
5614 if (GET_CODE (SET_DEST (set)) == MEM)
5615 addr = XEXP (SET_DEST (set), 0);
5616 else if (GET_CODE (SET_DEST (set)) == SUBREG
5617 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5618 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5622 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5623 addr = XVECEXP (addr, 0, 0);
5624 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5625 addr = XEXP (addr, 0);
5626 if (GET_CODE (addr) == MEM)
5627 addr = XEXP (addr, 0);
5633 if (addr && GET_CODE (addr) == POST_MODIFY)
5634 addr = XEXP (addr, 0);
5636 set = ia64_single_set (dep_insn);
5638 if ((dep_class == ITANIUM_CLASS_IALU
5639 || dep_class == ITANIUM_CLASS_ILOG
5640 || dep_class == ITANIUM_CLASS_LD)
5641 && (insn_class == ITANIUM_CLASS_LD
5642 || insn_class == ITANIUM_CLASS_ST))
5644 if (! addr || ! set)
5646 /* This isn't completely correct - an IALU that feeds an address has
5647 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5648 otherwise. Unfortunately there's no good way to describe this. */
5649 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5653 if ((dep_class == ITANIUM_CLASS_IALU
5654 || dep_class == ITANIUM_CLASS_ILOG
5655 || dep_class == ITANIUM_CLASS_LD)
5656 && (insn_class == ITANIUM_CLASS_MMMUL
5657 || insn_class == ITANIUM_CLASS_MMSHF
5658 || insn_class == ITANIUM_CLASS_MMSHFI))
5661 if (dep_class == ITANIUM_CLASS_FMAC
5662 && (insn_class == ITANIUM_CLASS_FMISC
5663 || insn_class == ITANIUM_CLASS_FCVTFX
5664 || insn_class == ITANIUM_CLASS_XMPY))
5667 if ((dep_class == ITANIUM_CLASS_FMAC
5668 || dep_class == ITANIUM_CLASS_FMISC
5669 || dep_class == ITANIUM_CLASS_FCVTFX
5670 || dep_class == ITANIUM_CLASS_XMPY)
5671 && insn_class == ITANIUM_CLASS_STF)
5674 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5675 but HP engineers say any non-MM operation. */
5676 if ((dep_class == ITANIUM_CLASS_MMMUL
5677 || dep_class == ITANIUM_CLASS_MMSHF
5678 || dep_class == ITANIUM_CLASS_MMSHFI)
5679 && insn_class != ITANIUM_CLASS_MMMUL
5680 && insn_class != ITANIUM_CLASS_MMSHF
5681 && insn_class != ITANIUM_CLASS_MMSHFI)
5687 /* Describe the current state of the Itanium pipeline. */
5690 /* The first slot that is used in the current cycle. */
5692 /* The next slot to fill. */
5694 /* The packet we have selected for the current issue window. */
5695 const struct ia64_packet *packet;
5696 /* The position of the split issue that occurs due to issue width
5697 limitations (6 if there's no split issue). */
5699 /* Record data about the insns scheduled so far in the same issue
5700 window. The elements up to but not including FIRST_SLOT belong
5701 to the previous cycle, the ones starting with FIRST_SLOT belong
5702 to the current cycle. */
5703 enum attr_type types[6];
5706 /* Nonzero if we decided to schedule a stop bit. */
5710 /* Temporary arrays; they have enough elements to hold all insns that
5711 can be ready at the same time while scheduling of the current block.
5712 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5713 static rtx *sched_ready;
5714 static enum attr_type *sched_types;
5716 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5720 insn_matches_slot (p, itype, slot, insn)
5721 const struct ia64_packet *p;
5722 enum attr_type itype;
5726 enum attr_itanium_requires_unit0 u0;
5727 enum attr_type stype = p->t[slot];
5731 u0 = ia64_safe_itanium_requires_unit0 (insn);
5732 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5735 for (i = sched_data.first_slot; i < slot; i++)
5736 if (p->t[i] == stype
5737 || (stype == TYPE_F && p->t[i] == TYPE_L)
5738 || (stype == TYPE_I && p->t[i] == TYPE_X))
5741 if (GET_CODE (insn) == CALL_INSN)
5743 /* Reject calls in multiway branch packets. We want to limit
5744 the number of multiway branches we generate (since the branch
5745 predictor is limited), and this seems to work fairly well.
5746 (If we didn't do this, we'd have to add another test here to
5747 force calls into the third slot of the bundle.) */
5750 if (p->t[1] == TYPE_B)
5755 if (p->t[4] == TYPE_B)
5763 if (itype == TYPE_A)
5764 return stype == TYPE_M || stype == TYPE_I;
5768 /* Like emit_insn_before, but skip cycle_display notes.
5769 ??? When cycle display notes are implemented, update this. */
5772 ia64_emit_insn_before (insn, before)
5775 emit_insn_before (insn, before);
5778 /* When rotating a bundle out of the issue window, insert a bundle selector
5779 insn in front of it. DUMP is the scheduling dump file or NULL. START
5780 is either 0 or 3, depending on whether we want to emit a bundle selector
5781 for the first bundle or the second bundle in the current issue window.
5783 The selector insns are emitted this late because the selected packet can
5784 be changed until parts of it get rotated out. */
5787 finish_last_head (dump, start)
5791 const struct ia64_packet *p = sched_data.packet;
5792 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5793 int bundle_type = b - bundle;
5797 if (! ia64_final_schedule)
5800 for (i = start; sched_data.insns[i] == 0; i++)
5803 insn = sched_data.insns[i];
5806 fprintf (dump, "// Emitting template before %d: %s\n",
5807 INSN_UID (insn), b->name);
5809 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5812 /* We can't schedule more insns this cycle. Fix up the scheduling state
5813 and advance FIRST_SLOT and CUR.
5814 We have to distribute the insns that are currently found between
5815 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5816 far, they are stored successively in the fields starting at FIRST_SLOT;
5817 now they must be moved to the correct slots.
5818 DUMP is the current scheduling dump file, or NULL. */
5821 cycle_end_fill_slots (dump)
5824 const struct ia64_packet *packet = sched_data.packet;
5826 enum attr_type tmp_types[6];
5829 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5830 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5832 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5834 enum attr_type t = tmp_types[i];
5835 if (t != ia64_safe_type (tmp_insns[i]))
5837 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5839 if (slot > sched_data.split)
5842 fprintf (dump, "// Packet needs %s, have %s\n",
5843 type_names[packet->t[slot]], type_names[t]);
5844 sched_data.types[slot] = packet->t[slot];
5845 sched_data.insns[slot] = 0;
5846 sched_data.stopbit[slot] = 0;
5848 /* ??? TYPE_L instructions always fill up two slots, but we don't
5849 support TYPE_L nops. */
5850 if (packet->t[slot] == TYPE_L)
5856 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5857 actual slot type later. */
5858 sched_data.types[slot] = packet->t[slot];
5859 sched_data.insns[slot] = tmp_insns[i];
5860 sched_data.stopbit[slot] = 0;
5863 /* TYPE_L instructions always fill up two slots. */
5866 sched_data.types[slot] = packet->t[slot];
5867 sched_data.insns[slot] = 0;
5868 sched_data.stopbit[slot] = 0;
5873 /* This isn't right - there's no need to pad out until the forced split;
5874 the CPU will automatically split if an insn isn't ready. */
5876 while (slot < sched_data.split)
5878 sched_data.types[slot] = packet->t[slot];
5879 sched_data.insns[slot] = 0;
5880 sched_data.stopbit[slot] = 0;
5885 sched_data.first_slot = sched_data.cur = slot;
5888 /* Bundle rotations, as described in the Itanium optimization manual.
5889 We can rotate either one or both bundles out of the issue window.
5890 DUMP is the current scheduling dump file, or NULL. */
5893 rotate_one_bundle (dump)
5897 fprintf (dump, "// Rotating one bundle.\n");
5899 finish_last_head (dump, 0);
5900 if (sched_data.cur > 3)
5902 sched_data.cur -= 3;
5903 sched_data.first_slot -= 3;
5904 memmove (sched_data.types,
5905 sched_data.types + 3,
5906 sched_data.cur * sizeof *sched_data.types);
5907 memmove (sched_data.stopbit,
5908 sched_data.stopbit + 3,
5909 sched_data.cur * sizeof *sched_data.stopbit);
5910 memmove (sched_data.insns,
5911 sched_data.insns + 3,
5912 sched_data.cur * sizeof *sched_data.insns);
5914 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
5919 sched_data.first_slot = 0;
5924 rotate_two_bundles (dump)
5928 fprintf (dump, "// Rotating two bundles.\n");
5930 if (sched_data.cur == 0)
5933 finish_last_head (dump, 0);
5934 if (sched_data.cur > 3)
5935 finish_last_head (dump, 3);
5937 sched_data.first_slot = 0;
5940 /* We're beginning a new block. Initialize data structures as necessary. */
5943 ia64_sched_init (dump, sched_verbose, max_ready)
5944 FILE *dump ATTRIBUTE_UNUSED;
5945 int sched_verbose ATTRIBUTE_UNUSED;
5948 static int initialized = 0;
5956 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5958 const struct bundle *t1 = bundle + b1;
5959 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5961 const struct bundle *t2 = bundle + b2;
5967 for (i = 0; i < NR_PACKETS; i++)
5970 for (j = 0; j < 3; j++)
5971 packets[i].t[j] = packets[i].t1->t[j];
5972 for (j = 0; j < 3; j++)
5973 packets[i].t[j + 3] = packets[i].t2->t[j];
5974 packets[i].first_split = itanium_split_issue (packets + i, 0);
5979 init_insn_group_barriers ();
5981 memset (&sched_data, 0, sizeof sched_data);
5982 sched_types = (enum attr_type *) xmalloc (max_ready
5983 * sizeof (enum attr_type));
5984 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5987 /* See if the packet P can match the insns we have already scheduled. Return
5988 nonzero if so. In *PSLOT, we store the first slot that is available for
5989 more instructions if we choose this packet.
5990 SPLIT holds the last slot we can use, there's a split issue after it so
5991 scheduling beyond it would cause us to use more than one cycle. */
5994 packet_matches_p (p, split, pslot)
5995 const struct ia64_packet *p;
5999 int filled = sched_data.cur;
6000 int first = sched_data.first_slot;
6003 /* First, check if the first of the two bundles must be a specific one (due
6005 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
6007 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
6010 for (i = 0; i < first; i++)
6011 if (! insn_matches_slot (p, sched_data.types[i], i,
6012 sched_data.insns[i]))
6014 for (i = slot = first; i < filled; i++)
6016 while (slot < split)
6018 if (insn_matches_slot (p, sched_data.types[i], slot,
6019 sched_data.insns[i]))
6033 /* A frontend for itanium_split_issue. For a packet P and a slot
6034 number FIRST that describes the start of the current clock cycle,
6035 return the slot number of the first split issue. This function
6036 uses the cached number found in P if possible. */
6039 get_split (p, first)
6040 const struct ia64_packet *p;
6044 return p->first_split;
6045 return itanium_split_issue (p, first);
6048 /* Given N_READY insns in the array READY, whose types are found in the
6049 corresponding array TYPES, return the insn that is best suited to be
6050 scheduled in slot SLOT of packet P. */
6053 find_best_insn (ready, types, n_ready, p, slot)
6055 enum attr_type *types;
6057 const struct ia64_packet *p;
6062 while (n_ready-- > 0)
6064 rtx insn = ready[n_ready];
6067 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
6069 /* If we have equally good insns, one of which has a stricter
6070 slot requirement, prefer the one with the stricter requirement. */
6071 if (best >= 0 && types[n_ready] == TYPE_A)
6073 if (insn_matches_slot (p, types[n_ready], slot, insn))
6076 best_pri = INSN_PRIORITY (ready[best]);
6078 /* If there's no way we could get a stricter requirement, stop
6080 if (types[n_ready] != TYPE_A
6081 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
6089 /* Select the best packet to use given the current scheduler state and the
6091 READY is an array holding N_READY ready insns; TYPES is a corresponding
6092 array that holds their types. Store the best packet in *PPACKET and the
6093 number of insns that can be scheduled in the current cycle in *PBEST. */
6096 find_best_packet (pbest, ppacket, ready, types, n_ready)
6098 const struct ia64_packet **ppacket;
6100 enum attr_type *types;
6103 int first = sched_data.first_slot;
6106 const struct ia64_packet *best_packet = NULL;
6109 for (i = 0; i < NR_PACKETS; i++)
6111 const struct ia64_packet *p = packets + i;
6113 int split = get_split (p, first);
6115 int first_slot, last_slot;
6118 if (! packet_matches_p (p, split, &first_slot))
6121 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
6125 for (slot = first_slot; slot < split; slot++)
6129 /* Disallow a degenerate case where the first bundle doesn't
6130 contain anything but NOPs! */
6131 if (first_slot == 0 && win == 0 && slot == 3)
6137 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
6140 sched_ready[insn_nr] = 0;
6144 else if (p->t[slot] == TYPE_B)
6147 /* We must disallow MBB/BBB packets if any of their B slots would be
6148 filled with nops. */
6151 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
6156 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
6161 || (win == best && last_slot < lowest_end))
6164 lowest_end = last_slot;
6169 *ppacket = best_packet;
6172 /* Reorder the ready list so that the insns that can be issued in this cycle
6173 are found in the correct order at the end of the list.
6174 DUMP is the scheduling dump file, or NULL. READY points to the start,
6175 E_READY to the end of the ready list. MAY_FAIL determines what should be
6176 done if no insns can be scheduled in this cycle: if it is zero, we abort,
6177 otherwise we return 0.
6178 Return 1 if any insns can be scheduled in this cycle. */
6181 itanium_reorder (dump, ready, e_ready, may_fail)
6187 const struct ia64_packet *best_packet;
6188 int n_ready = e_ready - ready;
6189 int first = sched_data.first_slot;
6190 int i, best, best_split, filled;
6192 for (i = 0; i < n_ready; i++)
6193 sched_types[i] = ia64_safe_type (ready[i]);
6195 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
6206 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
6207 best_packet->t1->name,
6208 best_packet->t2 ? best_packet->t2->name : NULL, best);
6211 best_split = itanium_split_issue (best_packet, first);
6212 packet_matches_p (best_packet, best_split, &filled);
6214 for (i = filled; i < best_split; i++)
6218 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
6221 rtx insn = ready[insn_nr];
6222 memmove (ready + insn_nr, ready + insn_nr + 1,
6223 (n_ready - insn_nr - 1) * sizeof (rtx));
6224 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
6225 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
6226 ready[--n_ready] = insn;
6230 sched_data.packet = best_packet;
6231 sched_data.split = best_split;
6235 /* Dump information about the current scheduling state to file DUMP. */
6238 dump_current_packet (dump)
6242 fprintf (dump, "// %d slots filled:", sched_data.cur);
6243 for (i = 0; i < sched_data.first_slot; i++)
6245 rtx insn = sched_data.insns[i];
6246 fprintf (dump, " %s", type_names[sched_data.types[i]]);
6248 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
6249 if (sched_data.stopbit[i])
6250 fprintf (dump, " ;;");
6252 fprintf (dump, " :::");
6253 for (i = sched_data.first_slot; i < sched_data.cur; i++)
6255 rtx insn = sched_data.insns[i];
6256 enum attr_type t = ia64_safe_type (insn);
6257 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
6259 fprintf (dump, "\n");
6262 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
6266 schedule_stop (dump)
6269 const struct ia64_packet *best = sched_data.packet;
6274 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6276 if (sched_data.cur == 0)
6279 fprintf (dump, "// At start of bundle, so nothing to do.\n");
6281 rotate_two_bundles (NULL);
6285 for (i = -1; i < NR_PACKETS; i++)
6287 /* This is a slight hack to give the current packet the first chance.
6288 This is done to avoid e.g. switching from MIB to MBB bundles. */
6289 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6290 int split = get_split (p, sched_data.first_slot);
6291 const struct bundle *compare;
6294 if (! packet_matches_p (p, split, &next))
6297 compare = next > 3 ? p->t2 : p->t1;
6300 if (compare->possible_stop)
6301 stoppos = compare->possible_stop;
6305 if (stoppos < next || stoppos >= best_stop)
6307 if (compare->possible_stop == 0)
6309 stoppos = (next > 3 ? 6 : 3);
6311 if (stoppos < next || stoppos >= best_stop)
6315 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6316 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6319 best_stop = stoppos;
6323 sched_data.packet = best;
6324 cycle_end_fill_slots (dump);
6325 while (sched_data.cur < best_stop)
6327 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6328 sched_data.insns[sched_data.cur] = 0;
6329 sched_data.stopbit[sched_data.cur] = 0;
6332 sched_data.stopbit[sched_data.cur - 1] = 1;
6333 sched_data.first_slot = best_stop;
6336 dump_current_packet (dump);
6339 /* If necessary, perform one or two rotations on the scheduling state.
6340 This should only be called if we are starting a new cycle. */
6346 cycle_end_fill_slots (dump);
6347 if (sched_data.cur == 6)
6348 rotate_two_bundles (dump);
6349 else if (sched_data.cur >= 3)
6350 rotate_one_bundle (dump);
6351 sched_data.first_slot = sched_data.cur;
6354 /* The clock cycle when ia64_sched_reorder was last called. */
6355 static int prev_cycle;
6357 /* The first insn scheduled in the previous cycle. This is the saved
6358 value of sched_data.first_slot. */
6359 static int prev_first;
6361 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6362 pad out the delay between MM (shifts, etc.) and integer operations. */
6365 nop_cycles_until (clock_var, dump)
6369 int prev_clock = prev_cycle;
6370 int cycles_left = clock_var - prev_clock;
6371 bool did_stop = false;
6373 /* Finish the previous cycle; pad it out with NOPs. */
6374 if (sched_data.cur == 3)
6376 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6378 maybe_rotate (dump);
6380 else if (sched_data.cur > 0)
6383 int split = itanium_split_issue (sched_data.packet, prev_first);
6385 if (sched_data.cur < 3 && split > 3)
6391 if (split > sched_data.cur)
6394 for (i = sched_data.cur; i < split; i++)
6396 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6397 sched_data.types[i] = sched_data.packet->t[i];
6398 sched_data.insns[i] = t;
6399 sched_data.stopbit[i] = 0;
6401 sched_data.cur = split;
6404 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6408 for (i = sched_data.cur; i < 6; i++)
6410 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6411 sched_data.types[i] = sched_data.packet->t[i];
6412 sched_data.insns[i] = t;
6413 sched_data.stopbit[i] = 0;
6420 if (need_stop || sched_data.cur == 6)
6422 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6425 maybe_rotate (dump);
6429 while (cycles_left > 0)
6431 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6432 sched_emit_insn (gen_nop_type (TYPE_M));
6433 sched_emit_insn (gen_nop_type (TYPE_I));
6434 if (cycles_left > 1)
6436 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6439 sched_emit_insn (gen_nop_type (TYPE_I));
6440 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6446 init_insn_group_barriers ();
6449 /* We are about to being issuing insns for this clock cycle.
6450 Override the default sort algorithm to better slot instructions. */
6453 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6454 reorder_type, clock_var)
6455 FILE *dump ATTRIBUTE_UNUSED;
6456 int sched_verbose ATTRIBUTE_UNUSED;
6459 int reorder_type, clock_var;
6462 int n_ready = *pn_ready;
6463 rtx *e_ready = ready + n_ready;
6468 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6469 dump_current_packet (dump);
6472 /* Work around the pipeline flush that will occurr if the results of
6473 an MM instruction are accessed before the result is ready. Intel
6474 documentation says this only happens with IALU, ISHF, ILOG, LD,
6475 and ST consumers, but experimental evidence shows that *any* non-MM
6476 type instruction will incurr the flush. */
6477 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6479 for (insnp = ready; insnp < e_ready; insnp++)
6481 rtx insn = *insnp, link;
6482 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6484 if (t == ITANIUM_CLASS_MMMUL
6485 || t == ITANIUM_CLASS_MMSHF
6486 || t == ITANIUM_CLASS_MMSHFI)
6489 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6490 if (REG_NOTE_KIND (link) == 0)
6492 rtx other = XEXP (link, 0);
6493 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6494 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6496 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6504 prev_first = sched_data.first_slot;
6505 prev_cycle = clock_var;
6507 if (reorder_type == 0)
6508 maybe_rotate (sched_verbose ? dump : NULL);
6510 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6512 for (insnp = ready; insnp < e_ready; insnp++)
6513 if (insnp < e_ready)
6516 enum attr_type t = ia64_safe_type (insn);
6517 if (t == TYPE_UNKNOWN)
6519 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6520 || asm_noperands (PATTERN (insn)) >= 0)
6522 rtx lowest = ready[n_asms];
6523 ready[n_asms] = insn;
6529 rtx highest = ready[n_ready - 1];
6530 ready[n_ready - 1] = insn;
6532 if (ia64_final_schedule && group_barrier_needed_p (insn))
6534 schedule_stop (sched_verbose ? dump : NULL);
6535 sched_data.last_was_stop = 1;
6536 maybe_rotate (sched_verbose ? dump : NULL);
6543 if (n_asms < n_ready)
6545 /* Some normal insns to process. Skip the asms. */
6549 else if (n_ready > 0)
6551 /* Only asm insns left. */
6552 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6554 schedule_stop (sched_verbose ? dump : NULL);
6555 sched_data.last_was_stop = 1;
6556 maybe_rotate (sched_verbose ? dump : NULL);
6558 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6562 if (ia64_final_schedule)
6564 int nr_need_stop = 0;
6566 for (insnp = ready; insnp < e_ready; insnp++)
6567 if (safe_group_barrier_needed_p (*insnp))
6570 /* Schedule a stop bit if
6571 - all insns require a stop bit, or
6572 - we are starting a new cycle and _any_ insns require a stop bit.
6573 The reason for the latter is that if our schedule is accurate, then
6574 the additional stop won't decrease performance at this point (since
6575 there's a split issue at this point anyway), but it gives us more
6576 freedom when scheduling the currently ready insns. */
6577 if ((reorder_type == 0 && nr_need_stop)
6578 || (reorder_type == 1 && n_ready == nr_need_stop))
6580 schedule_stop (sched_verbose ? dump : NULL);
6581 sched_data.last_was_stop = 1;
6582 maybe_rotate (sched_verbose ? dump : NULL);
6583 if (reorder_type == 1)
6590 /* Move down everything that needs a stop bit, preserving relative
6592 while (insnp-- > ready + deleted)
6593 while (insnp >= ready + deleted)
6596 if (! safe_group_barrier_needed_p (insn))
6598 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6604 if (deleted != nr_need_stop)
6609 return itanium_reorder (sched_verbose ? dump : NULL,
6610 ready, e_ready, reorder_type == 1);
6614 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6621 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6622 pn_ready, 0, clock_var);
6625 /* Like ia64_sched_reorder, but called after issuing each insn.
6626 Override the default sort algorithm to better slot instructions. */
6629 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6630 FILE *dump ATTRIBUTE_UNUSED;
6631 int sched_verbose ATTRIBUTE_UNUSED;
6636 if (sched_data.last_was_stop)
6639 /* Detect one special case and try to optimize it.
6640 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6641 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6642 if (sched_data.first_slot == 1
6643 && sched_data.stopbit[0]
6644 && ((sched_data.cur == 4
6645 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6646 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6647 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6648 || (sched_data.cur == 3
6649 && (sched_data.types[1] == TYPE_M
6650 || sched_data.types[1] == TYPE_A)
6651 && (sched_data.types[2] != TYPE_M
6652 && sched_data.types[2] != TYPE_I
6653 && sched_data.types[2] != TYPE_A))))
6657 rtx stop = sched_data.insns[1];
6659 /* Search backward for the stop bit that must be there. */
6664 stop = PREV_INSN (stop);
6665 if (GET_CODE (stop) != INSN)
6667 insn_code = recog_memoized (stop);
6669 /* Ignore .pred.rel.mutex.
6671 ??? Update this to ignore cycle display notes too
6672 ??? once those are implemented */
6673 if (insn_code == CODE_FOR_pred_rel_mutex
6674 || insn_code == CODE_FOR_prologue_use)
6677 if (insn_code == CODE_FOR_insn_group_barrier)
6682 /* Adjust the stop bit's slot selector. */
6683 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6685 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6687 sched_data.stopbit[0] = 0;
6688 sched_data.stopbit[2] = 1;
6690 sched_data.types[5] = sched_data.types[3];
6691 sched_data.types[4] = sched_data.types[2];
6692 sched_data.types[3] = sched_data.types[1];
6693 sched_data.insns[5] = sched_data.insns[3];
6694 sched_data.insns[4] = sched_data.insns[2];
6695 sched_data.insns[3] = sched_data.insns[1];
6696 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6697 sched_data.cur += 2;
6698 sched_data.first_slot = 3;
6699 for (i = 0; i < NR_PACKETS; i++)
6701 const struct ia64_packet *p = packets + i;
6702 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6704 sched_data.packet = p;
6708 rotate_one_bundle (sched_verbose ? dump : NULL);
6711 for (i = 0; i < NR_PACKETS; i++)
6713 const struct ia64_packet *p = packets + i;
6714 int split = get_split (p, sched_data.first_slot);
6717 /* Disallow multiway branches here. */
6718 if (p->t[1] == TYPE_B)
6721 if (packet_matches_p (p, split, &next) && next < best)
6724 sched_data.packet = p;
6725 sched_data.split = split;
6734 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6739 /* Did we schedule a stop? If so, finish this cycle. */
6740 if (sched_data.cur == sched_data.first_slot)
6745 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6747 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6749 dump_current_packet (dump);
6753 /* We are about to issue INSN. Return the number of insns left on the
6754 ready queue that can be issued this cycle. */
6757 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6761 int can_issue_more ATTRIBUTE_UNUSED;
6763 enum attr_type t = ia64_safe_type (insn);
6765 if (sched_data.last_was_stop)
6767 int t = sched_data.first_slot;
6770 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6771 init_insn_group_barriers ();
6772 sched_data.last_was_stop = 0;
6775 if (t == TYPE_UNKNOWN)
6778 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6779 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6780 || asm_noperands (PATTERN (insn)) >= 0)
6782 /* This must be some kind of asm. Clear the scheduling state. */
6783 rotate_two_bundles (sched_verbose ? dump : NULL);
6784 if (ia64_final_schedule)
6785 group_barrier_needed_p (insn);
6790 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6791 important state info. Don't delete this test. */
6792 if (ia64_final_schedule
6793 && group_barrier_needed_p (insn))
6796 sched_data.stopbit[sched_data.cur] = 0;
6797 sched_data.insns[sched_data.cur] = insn;
6798 sched_data.types[sched_data.cur] = t;
6802 fprintf (dump, "// Scheduling insn %d of type %s\n",
6803 INSN_UID (insn), type_names[t]);
6805 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6807 schedule_stop (sched_verbose ? dump : NULL);
6808 sched_data.last_was_stop = 1;
6814 /* Free data allocated by ia64_sched_init. */
6817 ia64_sched_finish (dump, sched_verbose)
6822 fprintf (dump, "// Finishing schedule.\n");
6823 rotate_two_bundles (NULL);
6828 /* Emit pseudo-ops for the assembler to describe predicate relations.
6829 At present this assumes that we only consider predicate pairs to
6830 be mutex, and that the assembler can deduce proper values from
6831 straight-line code. */
6834 emit_predicate_relation_info ()
6838 FOR_EACH_BB_REVERSE (bb)
6841 rtx head = bb->head;
6843 /* We only need such notes at code labels. */
6844 if (GET_CODE (head) != CODE_LABEL)
6846 if (GET_CODE (NEXT_INSN (head)) == NOTE
6847 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6848 head = NEXT_INSN (head);
6850 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6851 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6853 rtx p = gen_rtx_REG (BImode, r);
6854 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6855 if (head == bb->end)
6861 /* Look for conditional calls that do not return, and protect predicate
6862 relations around them. Otherwise the assembler will assume the call
6863 returns, and complain about uses of call-clobbered predicates after
6865 FOR_EACH_BB_REVERSE (bb)
6867 rtx insn = bb->head;
6871 if (GET_CODE (insn) == CALL_INSN
6872 && GET_CODE (PATTERN (insn)) == COND_EXEC
6873 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6875 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6876 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6877 if (bb->head == insn)
6879 if (bb->end == insn)
6883 if (insn == bb->end)
6885 insn = NEXT_INSN (insn);
6890 /* Generate a NOP instruction of type T. We will never generate L type
6900 return gen_nop_m ();
6902 return gen_nop_i ();
6904 return gen_nop_b ();
6906 return gen_nop_f ();
6908 return gen_nop_x ();
6914 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6915 here than while scheduling. */
6921 const struct bundle *b = 0;
6924 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6928 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6929 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6931 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
6932 || GET_CODE (insn) == CODE_LABEL)
6935 while (bundle_pos < 3)
6937 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6940 if (GET_CODE (insn) != CODE_LABEL)
6941 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6947 else if (GET_CODE (pat) == UNSPEC_VOLATILE
6948 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
6950 int t = INTVAL (XVECEXP (pat, 0, 0));
6952 while (bundle_pos < t)
6954 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6960 if (bundle_pos == 3)
6963 if (b && INSN_P (insn))
6965 t = ia64_safe_type (insn);
6966 if (asm_noperands (PATTERN (insn)) >= 0
6967 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6969 while (bundle_pos < 3)
6971 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6977 if (t == TYPE_UNKNOWN)
6979 while (bundle_pos < 3)
6981 if (t == b->t[bundle_pos]
6982 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6983 || b->t[bundle_pos] == TYPE_I)))
6986 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6995 /* Perform machine dependent operations on the rtl chain INSNS. */
7001 /* We are freeing block_for_insn in the toplev to keep compatibility
7002 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7003 compute_bb_for_insn ();
7005 /* If optimizing, we'll have split before scheduling. */
7007 split_all_insns (0);
7009 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7010 non-optimizing bootstrap. */
7011 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7013 if (ia64_flag_schedule_insns2)
7015 timevar_push (TV_SCHED2);
7016 ia64_final_schedule = 1;
7017 schedule_ebbs (rtl_dump_file);
7018 ia64_final_schedule = 0;
7019 timevar_pop (TV_SCHED2);
7021 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
7022 place as they were during scheduling. */
7023 emit_insn_group_barriers (rtl_dump_file, insns);
7027 emit_all_insn_group_barriers (rtl_dump_file, insns);
7029 /* A call must not be the last instruction in a function, so that the
7030 return address is still within the function, so that unwinding works
7031 properly. Note that IA-64 differs from dwarf2 on this point. */
7032 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7037 insn = get_last_insn ();
7038 if (! INSN_P (insn))
7039 insn = prev_active_insn (insn);
7040 if (GET_CODE (insn) == INSN
7041 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7042 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7045 insn = prev_active_insn (insn);
7047 if (GET_CODE (insn) == CALL_INSN)
7050 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7051 emit_insn (gen_break_f ());
7052 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7057 emit_predicate_relation_info ();
7060 /* Return true if REGNO is used by the epilogue. */
7063 ia64_epilogue_uses (regno)
7069 /* When a function makes a call through a function descriptor, we
7070 will write a (potentially) new value to "gp". After returning
7071 from such a call, we need to make sure the function restores the
7072 original gp-value, even if the function itself does not use the
7074 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
7076 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7077 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7078 /* For functions defined with the syscall_linkage attribute, all
7079 input registers are marked as live at all function exits. This
7080 prevents the register allocator from using the input registers,
7081 which in turn makes it possible to restart a system call after
7082 an interrupt without having to save/restore the input registers.
7083 This also prevents kernel data from leaking to application code. */
7084 return lookup_attribute ("syscall_linkage",
7085 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7088 /* Conditional return patterns can't represent the use of `b0' as
7089 the return address, so we force the value live this way. */
7093 /* Likewise for ar.pfs, which is used by br.ret. */
7101 /* Return true if REGNO is used by the frame unwinder. */
7104 ia64_eh_uses (regno)
7107 if (! reload_completed)
7110 if (current_frame_info.reg_save_b0
7111 && regno == current_frame_info.reg_save_b0)
7113 if (current_frame_info.reg_save_pr
7114 && regno == current_frame_info.reg_save_pr)
7116 if (current_frame_info.reg_save_ar_pfs
7117 && regno == current_frame_info.reg_save_ar_pfs)
7119 if (current_frame_info.reg_save_ar_unat
7120 && regno == current_frame_info.reg_save_ar_unat)
7122 if (current_frame_info.reg_save_ar_lc
7123 && regno == current_frame_info.reg_save_ar_lc)
7129 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7131 We add @ to the name if this goes in small data/bss. We can only put
7132 a variable in small data/bss if it is defined in this module or a module
7133 that we are statically linked with. We can't check the second condition,
7134 but TREE_STATIC gives us the first one. */
7136 /* ??? If we had IPA, we could check the second condition. We could support
7137 programmer added section attributes if the variable is not defined in this
7140 /* ??? See the v850 port for a cleaner way to do this. */
7142 /* ??? We could also support own long data here. Generating movl/add/ld8
7143 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7144 code faster because there is one less load. This also includes incomplete
7145 types which can't go in sdata/sbss. */
7148 ia64_in_small_data_p (exp)
7151 if (TARGET_NO_SDATA)
7154 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7156 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7157 if (strcmp (section, ".sdata") == 0
7158 || strcmp (section, ".sbss") == 0)
7163 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7165 /* If this is an incomplete type with size 0, then we can't put it
7166 in sdata because it might be too big when completed. */
7167 if (size > 0 && size <= ia64_section_threshold)
7175 ia64_encode_section_info (decl, first)
7177 int first ATTRIBUTE_UNUSED;
7179 const char *symbol_str;
7184 if (TREE_CODE (decl) == FUNCTION_DECL)
7186 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
7190 /* Careful not to prod global register variables. */
7191 if (TREE_CODE (decl) != VAR_DECL
7192 || GET_CODE (DECL_RTL (decl)) != MEM
7193 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
7196 symbol = XEXP (DECL_RTL (decl), 0);
7197 symbol_str = XSTR (symbol, 0);
7199 is_local = (*targetm.binds_local_p) (decl);
7201 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7202 encoding = " GLil"[decl_tls_model (decl)];
7203 /* Determine if DECL will wind up in .sdata/.sbss. */
7204 else if (is_local && ia64_in_small_data_p (decl))
7207 /* Finally, encode this into the symbol string. */
7213 if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7215 if (encoding == symbol_str[1])
7217 /* ??? Sdata became thread or thread becaome not thread. Lose. */
7221 len = strlen (symbol_str);
7222 newstr = alloca (len + 3);
7223 newstr[0] = ENCODE_SECTION_INFO_CHAR;
7224 newstr[1] = encoding;
7225 memcpy (newstr + 2, symbol_str, len + 1);
7227 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2);
7230 /* This decl is marked as being in small data/bss but it shouldn't be;
7231 one likely explanation for this is that the decl has been moved into
7232 a different section from the one it was in when encode_section_info
7233 was first called. Remove the encoding. */
7234 else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7235 XSTR (symbol, 0) = ggc_strdup (symbol_str + 2);
7239 ia64_strip_name_encoding (str)
7242 if (str[0] == ENCODE_SECTION_INFO_CHAR)
7249 /* Output assembly directives for prologue regions. */
7251 /* The current basic block number. */
7253 static bool last_block;
7255 /* True if we need a copy_state command at the start of the next block. */
7257 static bool need_copy_state;
7259 /* The function emits unwind directives for the start of an epilogue. */
7264 /* If this isn't the last block of the function, then we need to label the
7265 current state, and copy it back in at the start of the next block. */
7269 fprintf (asm_out_file, "\t.label_state 1\n");
7270 need_copy_state = true;
7273 fprintf (asm_out_file, "\t.restore sp\n");
7276 /* This function processes a SET pattern looking for specific patterns
7277 which result in emitting an assembly directive required for unwinding. */
7280 process_set (asm_out_file, pat)
7284 rtx src = SET_SRC (pat);
7285 rtx dest = SET_DEST (pat);
7286 int src_regno, dest_regno;
7288 /* Look for the ALLOC insn. */
7289 if (GET_CODE (src) == UNSPEC_VOLATILE
7290 && XINT (src, 1) == UNSPECV_ALLOC
7291 && GET_CODE (dest) == REG)
7293 dest_regno = REGNO (dest);
7295 /* If this isn't the final destination for ar.pfs, the alloc
7296 shouldn't have been marked frame related. */
7297 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7300 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7301 ia64_dbx_register_number (dest_regno));
7305 /* Look for SP = .... */
7306 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7308 if (GET_CODE (src) == PLUS)
7310 rtx op0 = XEXP (src, 0);
7311 rtx op1 = XEXP (src, 1);
7312 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7314 if (INTVAL (op1) < 0)
7316 fputs ("\t.fframe ", asm_out_file);
7317 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7319 fputc ('\n', asm_out_file);
7322 process_epilogue ();
7327 else if (GET_CODE (src) == REG
7328 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7329 process_epilogue ();
7336 /* Register move we need to look at. */
7337 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7339 src_regno = REGNO (src);
7340 dest_regno = REGNO (dest);
7345 /* Saving return address pointer. */
7346 if (dest_regno != current_frame_info.reg_save_b0)
7348 fprintf (asm_out_file, "\t.save rp, r%d\n",
7349 ia64_dbx_register_number (dest_regno));
7353 if (dest_regno != current_frame_info.reg_save_pr)
7355 fprintf (asm_out_file, "\t.save pr, r%d\n",
7356 ia64_dbx_register_number (dest_regno));
7359 case AR_UNAT_REGNUM:
7360 if (dest_regno != current_frame_info.reg_save_ar_unat)
7362 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7363 ia64_dbx_register_number (dest_regno));
7367 if (dest_regno != current_frame_info.reg_save_ar_lc)
7369 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7370 ia64_dbx_register_number (dest_regno));
7373 case STACK_POINTER_REGNUM:
7374 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7375 || ! frame_pointer_needed)
7377 fprintf (asm_out_file, "\t.vframe r%d\n",
7378 ia64_dbx_register_number (dest_regno));
7382 /* Everything else should indicate being stored to memory. */
7387 /* Memory store we need to look at. */
7388 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7394 if (GET_CODE (XEXP (dest, 0)) == REG)
7396 base = XEXP (dest, 0);
7399 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7400 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7402 base = XEXP (XEXP (dest, 0), 0);
7403 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7408 if (base == hard_frame_pointer_rtx)
7410 saveop = ".savepsp";
7413 else if (base == stack_pointer_rtx)
7418 src_regno = REGNO (src);
7422 if (current_frame_info.reg_save_b0 != 0)
7424 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7428 if (current_frame_info.reg_save_pr != 0)
7430 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7434 if (current_frame_info.reg_save_ar_lc != 0)
7436 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7440 if (current_frame_info.reg_save_ar_pfs != 0)
7442 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7445 case AR_UNAT_REGNUM:
7446 if (current_frame_info.reg_save_ar_unat != 0)
7448 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7455 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7456 1 << (src_regno - GR_REG (4)));
7464 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7465 1 << (src_regno - BR_REG (1)));
7472 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7473 1 << (src_regno - FR_REG (2)));
7476 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7477 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7478 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7479 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7480 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7481 1 << (src_regno - FR_REG (12)));
7493 /* This function looks at a single insn and emits any directives
7494 required to unwind this insn. */
7496 process_for_unwind_directive (asm_out_file, insn)
7500 if (flag_unwind_tables
7501 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7505 if (GET_CODE (insn) == NOTE
7506 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7508 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7510 /* Restore unwind state from immediately before the epilogue. */
7511 if (need_copy_state)
7513 fprintf (asm_out_file, "\t.body\n");
7514 fprintf (asm_out_file, "\t.copy_state 1\n");
7515 need_copy_state = false;
7519 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7522 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7524 pat = XEXP (pat, 0);
7526 pat = PATTERN (insn);
7528 switch (GET_CODE (pat))
7531 process_set (asm_out_file, pat);
7537 int limit = XVECLEN (pat, 0);
7538 for (par_index = 0; par_index < limit; par_index++)
7540 rtx x = XVECEXP (pat, 0, par_index);
7541 if (GET_CODE (x) == SET)
7542 process_set (asm_out_file, x);
7555 ia64_init_builtins ()
7557 tree psi_type_node = build_pointer_type (integer_type_node);
7558 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7560 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7561 tree si_ftype_psi_si_si
7562 = build_function_type_list (integer_type_node,
7563 psi_type_node, integer_type_node,
7564 integer_type_node, NULL_TREE);
7566 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7567 tree di_ftype_pdi_di_di
7568 = build_function_type_list (long_integer_type_node,
7569 pdi_type_node, long_integer_type_node,
7570 long_integer_type_node, NULL_TREE);
7571 /* __sync_synchronize */
7572 tree void_ftype_void
7573 = build_function_type (void_type_node, void_list_node);
7575 /* __sync_lock_test_and_set_si */
7576 tree si_ftype_psi_si
7577 = build_function_type_list (integer_type_node,
7578 psi_type_node, integer_type_node, NULL_TREE);
7580 /* __sync_lock_test_and_set_di */
7581 tree di_ftype_pdi_di
7582 = build_function_type_list (long_integer_type_node,
7583 pdi_type_node, long_integer_type_node,
7586 /* __sync_lock_release_si */
7588 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7590 /* __sync_lock_release_di */
7592 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7594 #define def_builtin(name, type, code) \
7595 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7597 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7598 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7599 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7600 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7601 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7602 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7603 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7604 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7606 def_builtin ("__sync_synchronize", void_ftype_void,
7607 IA64_BUILTIN_SYNCHRONIZE);
7609 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7610 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7611 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7612 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7613 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7614 IA64_BUILTIN_LOCK_RELEASE_SI);
7615 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7616 IA64_BUILTIN_LOCK_RELEASE_DI);
7618 def_builtin ("__builtin_ia64_bsp",
7619 build_function_type (ptr_type_node, void_list_node),
7622 def_builtin ("__builtin_ia64_flushrs",
7623 build_function_type (void_type_node, void_list_node),
7624 IA64_BUILTIN_FLUSHRS);
7626 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7627 IA64_BUILTIN_FETCH_AND_ADD_SI);
7628 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7629 IA64_BUILTIN_FETCH_AND_SUB_SI);
7630 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7631 IA64_BUILTIN_FETCH_AND_OR_SI);
7632 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7633 IA64_BUILTIN_FETCH_AND_AND_SI);
7634 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7635 IA64_BUILTIN_FETCH_AND_XOR_SI);
7636 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7637 IA64_BUILTIN_FETCH_AND_NAND_SI);
7639 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7640 IA64_BUILTIN_ADD_AND_FETCH_SI);
7641 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7642 IA64_BUILTIN_SUB_AND_FETCH_SI);
7643 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7644 IA64_BUILTIN_OR_AND_FETCH_SI);
7645 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7646 IA64_BUILTIN_AND_AND_FETCH_SI);
7647 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7648 IA64_BUILTIN_XOR_AND_FETCH_SI);
7649 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7650 IA64_BUILTIN_NAND_AND_FETCH_SI);
7652 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7653 IA64_BUILTIN_FETCH_AND_ADD_DI);
7654 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7655 IA64_BUILTIN_FETCH_AND_SUB_DI);
7656 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7657 IA64_BUILTIN_FETCH_AND_OR_DI);
7658 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7659 IA64_BUILTIN_FETCH_AND_AND_DI);
7660 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7661 IA64_BUILTIN_FETCH_AND_XOR_DI);
7662 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7663 IA64_BUILTIN_FETCH_AND_NAND_DI);
7665 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7666 IA64_BUILTIN_ADD_AND_FETCH_DI);
7667 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7668 IA64_BUILTIN_SUB_AND_FETCH_DI);
7669 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7670 IA64_BUILTIN_OR_AND_FETCH_DI);
7671 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7672 IA64_BUILTIN_AND_AND_FETCH_DI);
7673 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7674 IA64_BUILTIN_XOR_AND_FETCH_DI);
7675 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7676 IA64_BUILTIN_NAND_AND_FETCH_DI);
7681 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7689 cmpxchgsz.acq tmp = [ptr], tmp
7690 } while (tmp != ret)
7694 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7696 enum machine_mode mode;
7700 rtx ret, label, tmp, ccv, insn, mem, value;
7703 arg0 = TREE_VALUE (arglist);
7704 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7705 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7706 #ifdef POINTERS_EXTEND_UNSIGNED
7707 if (GET_MODE(mem) != Pmode)
7708 mem = convert_memory_address (Pmode, mem);
7710 value = expand_expr (arg1, NULL_RTX, mode, 0);
7712 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7713 MEM_VOLATILE_P (mem) = 1;
7715 if (target && register_operand (target, mode))
7718 ret = gen_reg_rtx (mode);
7720 emit_insn (gen_mf ());
7722 /* Special case for fetchadd instructions. */
7723 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7726 insn = gen_fetchadd_acq_si (ret, mem, value);
7728 insn = gen_fetchadd_acq_di (ret, mem, value);
7733 tmp = gen_reg_rtx (mode);
7734 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7735 emit_move_insn (tmp, mem);
7737 label = gen_label_rtx ();
7739 emit_move_insn (ret, tmp);
7740 emit_move_insn (ccv, tmp);
7742 /* Perform the specific operation. Special case NAND by noticing
7743 one_cmpl_optab instead. */
7744 if (binoptab == one_cmpl_optab)
7746 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7747 binoptab = and_optab;
7749 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7752 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7754 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7757 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7762 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7770 cmpxchgsz.acq tmp = [ptr], ret
7771 } while (tmp != old)
7775 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7777 enum machine_mode mode;
7781 rtx old, label, tmp, ret, ccv, insn, mem, value;
7784 arg0 = TREE_VALUE (arglist);
7785 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7786 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7787 #ifdef POINTERS_EXTEND_UNSIGNED
7788 if (GET_MODE(mem) != Pmode)
7789 mem = convert_memory_address (Pmode, mem);
7792 value = expand_expr (arg1, NULL_RTX, mode, 0);
7794 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7795 MEM_VOLATILE_P (mem) = 1;
7797 if (target && ! register_operand (target, mode))
7800 emit_insn (gen_mf ());
7801 tmp = gen_reg_rtx (mode);
7802 old = gen_reg_rtx (mode);
7803 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7805 emit_move_insn (tmp, mem);
7807 label = gen_label_rtx ();
7809 emit_move_insn (old, tmp);
7810 emit_move_insn (ccv, tmp);
7812 /* Perform the specific operation. Special case NAND by noticing
7813 one_cmpl_optab instead. */
7814 if (binoptab == one_cmpl_optab)
7816 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7817 binoptab = and_optab;
7819 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7822 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7824 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7827 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7832 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7836 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7839 For bool_ it's the same except return ret == oldval.
7843 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7844 enum machine_mode mode;
7849 tree arg0, arg1, arg2;
7850 rtx mem, old, new, ccv, tmp, insn;
7852 arg0 = TREE_VALUE (arglist);
7853 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7854 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7855 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7856 old = expand_expr (arg1, NULL_RTX, mode, 0);
7857 new = expand_expr (arg2, NULL_RTX, mode, 0);
7859 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7860 MEM_VOLATILE_P (mem) = 1;
7862 if (! register_operand (old, mode))
7863 old = copy_to_mode_reg (mode, old);
7864 if (! register_operand (new, mode))
7865 new = copy_to_mode_reg (mode, new);
7867 if (! boolp && target && register_operand (target, mode))
7870 tmp = gen_reg_rtx (mode);
7872 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7873 emit_move_insn (ccv, old);
7874 emit_insn (gen_mf ());
7876 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7878 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7884 target = gen_reg_rtx (mode);
7885 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7891 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7894 ia64_expand_lock_test_and_set (mode, arglist, target)
7895 enum machine_mode mode;
7900 rtx mem, new, ret, insn;
7902 arg0 = TREE_VALUE (arglist);
7903 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7904 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7905 new = expand_expr (arg1, NULL_RTX, mode, 0);
7907 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7908 MEM_VOLATILE_P (mem) = 1;
7909 if (! register_operand (new, mode))
7910 new = copy_to_mode_reg (mode, new);
7912 if (target && register_operand (target, mode))
7915 ret = gen_reg_rtx (mode);
7918 insn = gen_xchgsi (ret, mem, new);
7920 insn = gen_xchgdi (ret, mem, new);
7926 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7929 ia64_expand_lock_release (mode, arglist, target)
7930 enum machine_mode mode;
7932 rtx target ATTRIBUTE_UNUSED;
7937 arg0 = TREE_VALUE (arglist);
7938 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7940 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7941 MEM_VOLATILE_P (mem) = 1;
7943 emit_move_insn (mem, const0_rtx);
7949 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7952 rtx subtarget ATTRIBUTE_UNUSED;
7953 enum machine_mode mode ATTRIBUTE_UNUSED;
7954 int ignore ATTRIBUTE_UNUSED;
7956 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7957 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7958 tree arglist = TREE_OPERAND (exp, 1);
7962 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7963 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7964 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7965 case IA64_BUILTIN_LOCK_RELEASE_SI:
7966 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7967 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7968 case IA64_BUILTIN_FETCH_AND_OR_SI:
7969 case IA64_BUILTIN_FETCH_AND_AND_SI:
7970 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7971 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7972 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7973 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7974 case IA64_BUILTIN_OR_AND_FETCH_SI:
7975 case IA64_BUILTIN_AND_AND_FETCH_SI:
7976 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7977 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7981 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7982 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7983 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7984 case IA64_BUILTIN_LOCK_RELEASE_DI:
7985 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7986 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7987 case IA64_BUILTIN_FETCH_AND_OR_DI:
7988 case IA64_BUILTIN_FETCH_AND_AND_DI:
7989 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7990 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7991 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7992 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7993 case IA64_BUILTIN_OR_AND_FETCH_DI:
7994 case IA64_BUILTIN_AND_AND_FETCH_DI:
7995 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7996 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8006 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8007 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8008 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
8010 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8011 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8012 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
8014 case IA64_BUILTIN_SYNCHRONIZE:
8015 emit_insn (gen_mf ());
8018 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8019 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8020 return ia64_expand_lock_test_and_set (mode, arglist, target);
8022 case IA64_BUILTIN_LOCK_RELEASE_SI:
8023 case IA64_BUILTIN_LOCK_RELEASE_DI:
8024 return ia64_expand_lock_release (mode, arglist, target);
8026 case IA64_BUILTIN_BSP:
8027 if (! target || ! register_operand (target, DImode))
8028 target = gen_reg_rtx (DImode);
8029 emit_insn (gen_bsp_value (target));
8032 case IA64_BUILTIN_FLUSHRS:
8033 emit_insn (gen_flushrs ());
8036 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8037 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8038 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8040 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8041 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8042 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8044 case IA64_BUILTIN_FETCH_AND_OR_SI:
8045 case IA64_BUILTIN_FETCH_AND_OR_DI:
8046 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8048 case IA64_BUILTIN_FETCH_AND_AND_SI:
8049 case IA64_BUILTIN_FETCH_AND_AND_DI:
8050 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8052 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8053 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8054 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8056 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8057 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8058 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8060 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8061 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8062 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8064 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8065 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8066 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8068 case IA64_BUILTIN_OR_AND_FETCH_SI:
8069 case IA64_BUILTIN_OR_AND_FETCH_DI:
8070 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8072 case IA64_BUILTIN_AND_AND_FETCH_SI:
8073 case IA64_BUILTIN_AND_AND_FETCH_DI:
8074 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8076 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8077 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8078 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8080 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8081 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8082 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8091 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8092 most significant bits of the stack slot. */
8095 ia64_hpux_function_arg_padding (mode, type)
8096 enum machine_mode mode;
8099 /* Exception to normal case for structures/unions/etc. */
8101 if (type && AGGREGATE_TYPE_P (type)
8102 && int_size_in_bytes (type) < UNITS_PER_WORD)
8105 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8106 hardwired to be true. */
8108 return((mode == BLKmode
8109 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8110 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8111 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8112 ? downward : upward);
8115 /* Linked list of all external functions that are to be emitted by GCC.
8116 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8117 order to avoid putting out names that are never really used. */
8119 struct extern_func_list
8121 struct extern_func_list *next; /* next external */
8122 char *name; /* name of the external */
8123 } *extern_func_head = 0;
8126 ia64_hpux_add_extern_decl (name)
8129 struct extern_func_list *p;
8131 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8132 p->name = xmalloc (strlen (name) + 1);
8133 strcpy(p->name, name);
8134 p->next = extern_func_head;
8135 extern_func_head = p;
8138 /* Print out the list of used global functions. */
8141 ia64_hpux_asm_file_end (file)
8144 while (extern_func_head)
8146 const char *real_name;
8149 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8150 decl = maybe_get_identifier (real_name);
8153 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8156 TREE_ASM_WRITTEN (decl) = 1;
8157 (*targetm.asm_out.globalize_label) (file, extern_func_head->name);
8158 fprintf (file, "%s", TYPE_ASM_OP);
8159 assemble_name (file, extern_func_head->name);
8161 fprintf (file, TYPE_OPERAND_FMT, "function");
8164 extern_func_head = extern_func_head->next;
8169 /* Switch to the section to which we should output X. The only thing
8170 special we do here is to honor small data. */
8173 ia64_select_rtx_section (mode, x, align)
8174 enum machine_mode mode;
8176 unsigned HOST_WIDE_INT align;
8178 if (GET_MODE_SIZE (mode) > 0
8179 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8182 default_elf_select_rtx_section (mode, x, align);
8185 /* It is illegal to have relocations in shared segments on AIX.
8186 Pretend flag_pic is always set. */
8189 ia64_aix_select_section (exp, reloc, align)
8192 unsigned HOST_WIDE_INT align;
8194 int save_pic = flag_pic;
8196 default_elf_select_section (exp, reloc, align);
8197 flag_pic = save_pic;
8201 ia64_aix_unique_section (decl, reloc)
8205 int save_pic = flag_pic;
8207 default_unique_section (decl, reloc);
8208 flag_pic = save_pic;
8212 ia64_aix_select_rtx_section (mode, x, align)
8213 enum machine_mode mode;
8215 unsigned HOST_WIDE_INT align;
8217 int save_pic = flag_pic;
8219 ia64_select_rtx_section (mode, x, align);
8220 flag_pic = save_pic;
8223 /* Output the assembler code for a thunk function. THUNK_DECL is the
8224 declaration for the thunk function itself, FUNCTION is the decl for
8225 the target function. DELTA is an immediate constant offset to be
8226 added to THIS. If VCALL_OFFSET is nonzero, the word at
8227 *(*this + vcall_offset) should be added to THIS. */
8230 ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8232 tree thunk ATTRIBUTE_UNUSED;
8233 HOST_WIDE_INT delta;
8234 HOST_WIDE_INT vcall_offset;
8237 rtx this, insn, funexp;
8239 /* Set things up as ia64_expand_prologue might. */
8240 last_scratch_gr_reg = 15;
8242 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8243 current_frame_info.spill_cfa_off = -16;
8244 current_frame_info.n_input_regs = 1;
8245 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8247 if (!TARGET_REG_NAMES)
8248 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8250 /* Mark the end of the (empty) prologue. */
8251 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8253 this = gen_rtx_REG (Pmode, IN_REG (0));
8255 /* Apply the constant offset, if required. */
8258 rtx delta_rtx = GEN_INT (delta);
8260 if (!CONST_OK_FOR_I (delta))
8262 rtx tmp = gen_rtx_REG (Pmode, 2);
8263 emit_move_insn (tmp, delta_rtx);
8266 emit_insn (gen_adddi3 (this, this, delta_rtx));
8269 /* Apply the offset from the vtable, if required. */
8272 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8273 rtx tmp = gen_rtx_REG (Pmode, 2);
8275 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8277 if (!CONST_OK_FOR_J (vcall_offset))
8279 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8280 emit_move_insn (tmp2, vcall_offset_rtx);
8281 vcall_offset_rtx = tmp2;
8283 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8285 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8287 emit_insn (gen_adddi3 (this, this, tmp));
8290 /* Generate a tail call to the target function. */
8291 if (! TREE_USED (function))
8293 assemble_external (function);
8294 TREE_USED (function) = 1;
8296 funexp = XEXP (DECL_RTL (function), 0);
8297 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8298 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8299 insn = get_last_insn ();
8300 SIBLING_CALL_P (insn) = 1;
8303 /* Run just enough of rest_of_compilation to get the insns emitted.
8304 There's not really enough bulk here to make other passes such as
8305 instruction scheduling worth while. Note that use_thunk calls
8306 assemble_start_function and assemble_end_function. */
8307 insn = get_insns ();
8308 emit_all_insn_group_barriers (NULL, insn);
8309 shorten_branches (insn);
8310 final_start_function (insn, file, 1);
8311 final (insn, file, 1, 0);
8312 final_end_function ();
8315 #include "gt-ia64.h"