1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52 int ia64_asm_output_label = 0;
54 /* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56 struct rtx_def * ia64_compare_op0;
57 struct rtx_def * ia64_compare_op1;
59 /* Register names for ia64_expand_prologue. */
60 static const char * const ia64_reg_numbers[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_input_reg_names[8] =
76 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_local_reg_names[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
91 /* ??? These strings could be shared with REGISTER_NAMES. */
92 static const char * const ia64_output_reg_names[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
95 /* String used with the -mfixed-range= option. */
96 const char *ia64_fixed_range_string;
98 /* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100 static int ia64_flag_schedule_insns2;
102 /* Variables which are this size or smaller are put in the sdata/sbss
105 unsigned int ia64_section_threshold;
107 static int find_gr_spill PARAMS ((int));
108 static int next_scratch_gr_reg PARAMS ((void));
109 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
110 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
111 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
112 static void finish_spill_pointers PARAMS ((void));
113 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
114 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
115 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
116 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
118 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
120 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
121 static void fix_range PARAMS ((const char *));
122 static void ia64_add_gc_roots PARAMS ((void));
123 static void ia64_init_machine_status PARAMS ((struct function *));
124 static void ia64_mark_machine_status PARAMS ((struct function *));
125 static void ia64_free_machine_status PARAMS ((struct function *));
126 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
128 static void emit_predicate_relation_info PARAMS ((void));
129 static void process_epilogue PARAMS ((void));
130 static int process_set PARAMS ((FILE *, rtx));
132 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
134 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
136 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
138 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
140 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
141 const struct attribute_spec ia64_attribute_table[];
142 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
143 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
144 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
145 static void ia64_output_function_end_prologue PARAMS ((FILE *));
147 static int ia64_issue_rate PARAMS ((void));
148 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
149 static void ia64_sched_init PARAMS ((FILE *, int, int));
150 static void ia64_sched_finish PARAMS ((FILE *, int));
151 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
153 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
154 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
155 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
156 static rtx ia64_cycle_display PARAMS ((int, rtx));
159 /* Initialize the GCC target structure. */
160 #undef TARGET_ATTRIBUTE_TABLE
161 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
163 #undef TARGET_INIT_BUILTINS
164 #define TARGET_INIT_BUILTINS ia64_init_builtins
166 #undef TARGET_EXPAND_BUILTIN
167 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
169 #undef TARGET_ASM_BYTE_OP
170 #define TARGET_ASM_BYTE_OP "\tdata1\t"
171 #undef TARGET_ASM_ALIGNED_HI_OP
172 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
173 #undef TARGET_ASM_ALIGNED_SI_OP
174 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
175 #undef TARGET_ASM_ALIGNED_DI_OP
176 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
177 #undef TARGET_ASM_UNALIGNED_HI_OP
178 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
179 #undef TARGET_ASM_UNALIGNED_SI_OP
180 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
181 #undef TARGET_ASM_UNALIGNED_DI_OP
182 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
183 #undef TARGET_ASM_INTEGER
184 #define TARGET_ASM_INTEGER ia64_assemble_integer
186 #undef TARGET_ASM_FUNCTION_PROLOGUE
187 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
188 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
189 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
190 #undef TARGET_ASM_FUNCTION_EPILOGUE
191 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
193 #undef TARGET_SCHED_ADJUST_COST
194 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
195 #undef TARGET_SCHED_ISSUE_RATE
196 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
197 #undef TARGET_SCHED_VARIABLE_ISSUE
198 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
199 #undef TARGET_SCHED_INIT
200 #define TARGET_SCHED_INIT ia64_sched_init
201 #undef TARGET_SCHED_FINISH
202 #define TARGET_SCHED_FINISH ia64_sched_finish
203 #undef TARGET_SCHED_REORDER
204 #define TARGET_SCHED_REORDER ia64_sched_reorder
205 #undef TARGET_SCHED_REORDER2
206 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
207 #undef TARGET_SCHED_CYCLE_DISPLAY
208 #define TARGET_SCHED_CYCLE_DISPLAY ia64_cycle_display
210 struct gcc_target targetm = TARGET_INITIALIZER;
212 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
215 call_operand (op, mode)
217 enum machine_mode mode;
219 if (mode != GET_MODE (op))
222 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
223 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
226 /* Return 1 if OP refers to a symbol in the sdata section. */
229 sdata_symbolic_operand (op, mode)
231 enum machine_mode mode ATTRIBUTE_UNUSED;
233 switch (GET_CODE (op))
236 if (GET_CODE (XEXP (op, 0)) != PLUS
237 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
239 op = XEXP (XEXP (op, 0), 0);
243 if (CONSTANT_POOL_ADDRESS_P (op))
244 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
246 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
255 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
258 got_symbolic_operand (op, mode)
260 enum machine_mode mode ATTRIBUTE_UNUSED;
262 switch (GET_CODE (op))
266 if (GET_CODE (op) != PLUS)
268 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
271 if (GET_CODE (op) != CONST_INT)
276 /* Ok if we're not using GOT entries at all. */
277 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
280 /* "Ok" while emitting rtl, since otherwise we won't be provided
281 with the entire offset during emission, which makes it very
282 hard to split the offset into high and low parts. */
283 if (rtx_equal_function_value_matters)
286 /* Force the low 14 bits of the constant to zero so that we do not
287 use up so many GOT entries. */
288 return (INTVAL (op) & 0x3fff) == 0;
300 /* Return 1 if OP refers to a symbol. */
303 symbolic_operand (op, mode)
305 enum machine_mode mode ATTRIBUTE_UNUSED;
307 switch (GET_CODE (op))
320 /* Return 1 if OP refers to a function. */
323 function_operand (op, mode)
325 enum machine_mode mode ATTRIBUTE_UNUSED;
327 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
333 /* Return 1 if OP is setjmp or a similar function. */
335 /* ??? This is an unsatisfying solution. Should rethink. */
338 setjmp_operand (op, mode)
340 enum machine_mode mode ATTRIBUTE_UNUSED;
345 if (GET_CODE (op) != SYMBOL_REF)
350 /* The following code is borrowed from special_function_p in calls.c. */
352 /* Disregard prefix _, __ or __x. */
355 if (name[1] == '_' && name[2] == 'x')
357 else if (name[1] == '_')
367 && (! strcmp (name, "setjmp")
368 || ! strcmp (name, "setjmp_syscall")))
370 && ! strcmp (name, "sigsetjmp"))
372 && ! strcmp (name, "savectx")));
374 else if ((name[0] == 'q' && name[1] == 's'
375 && ! strcmp (name, "qsetjmp"))
376 || (name[0] == 'v' && name[1] == 'f'
377 && ! strcmp (name, "vfork")))
383 /* Return 1 if OP is a general operand, but when pic exclude symbolic
386 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
387 from PREDICATE_CODES. */
390 move_operand (op, mode)
392 enum machine_mode mode;
394 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
397 return general_operand (op, mode);
400 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
403 gr_register_operand (op, mode)
405 enum machine_mode mode;
407 if (! register_operand (op, mode))
409 if (GET_CODE (op) == SUBREG)
410 op = SUBREG_REG (op);
411 if (GET_CODE (op) == REG)
413 unsigned int regno = REGNO (op);
414 if (regno < FIRST_PSEUDO_REGISTER)
415 return GENERAL_REGNO_P (regno);
420 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
423 fr_register_operand (op, mode)
425 enum machine_mode mode;
427 if (! register_operand (op, mode))
429 if (GET_CODE (op) == SUBREG)
430 op = SUBREG_REG (op);
431 if (GET_CODE (op) == REG)
433 unsigned int regno = REGNO (op);
434 if (regno < FIRST_PSEUDO_REGISTER)
435 return FR_REGNO_P (regno);
440 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
443 grfr_register_operand (op, mode)
445 enum machine_mode mode;
447 if (! register_operand (op, mode))
449 if (GET_CODE (op) == SUBREG)
450 op = SUBREG_REG (op);
451 if (GET_CODE (op) == REG)
453 unsigned int regno = REGNO (op);
454 if (regno < FIRST_PSEUDO_REGISTER)
455 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
460 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
463 gr_nonimmediate_operand (op, mode)
465 enum machine_mode mode;
467 if (! nonimmediate_operand (op, mode))
469 if (GET_CODE (op) == SUBREG)
470 op = SUBREG_REG (op);
471 if (GET_CODE (op) == REG)
473 unsigned int regno = REGNO (op);
474 if (regno < FIRST_PSEUDO_REGISTER)
475 return GENERAL_REGNO_P (regno);
480 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
483 fr_nonimmediate_operand (op, mode)
485 enum machine_mode mode;
487 if (! nonimmediate_operand (op, mode))
489 if (GET_CODE (op) == SUBREG)
490 op = SUBREG_REG (op);
491 if (GET_CODE (op) == REG)
493 unsigned int regno = REGNO (op);
494 if (regno < FIRST_PSEUDO_REGISTER)
495 return FR_REGNO_P (regno);
500 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
503 grfr_nonimmediate_operand (op, mode)
505 enum machine_mode mode;
507 if (! nonimmediate_operand (op, mode))
509 if (GET_CODE (op) == SUBREG)
510 op = SUBREG_REG (op);
511 if (GET_CODE (op) == REG)
513 unsigned int regno = REGNO (op);
514 if (regno < FIRST_PSEUDO_REGISTER)
515 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
520 /* Return 1 if OP is a GR register operand, or zero. */
523 gr_reg_or_0_operand (op, mode)
525 enum machine_mode mode;
527 return (op == const0_rtx || gr_register_operand (op, mode));
530 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
533 gr_reg_or_5bit_operand (op, mode)
535 enum machine_mode mode;
537 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
538 || GET_CODE (op) == CONSTANT_P_RTX
539 || gr_register_operand (op, mode));
542 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
545 gr_reg_or_6bit_operand (op, mode)
547 enum machine_mode mode;
549 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
550 || GET_CODE (op) == CONSTANT_P_RTX
551 || gr_register_operand (op, mode));
554 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
557 gr_reg_or_8bit_operand (op, mode)
559 enum machine_mode mode;
561 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
562 || GET_CODE (op) == CONSTANT_P_RTX
563 || gr_register_operand (op, mode));
566 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
569 grfr_reg_or_8bit_operand (op, mode)
571 enum machine_mode mode;
573 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
574 || GET_CODE (op) == CONSTANT_P_RTX
575 || grfr_register_operand (op, mode));
578 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
582 gr_reg_or_8bit_adjusted_operand (op, mode)
584 enum machine_mode mode;
586 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
587 || GET_CODE (op) == CONSTANT_P_RTX
588 || gr_register_operand (op, mode));
591 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
592 immediate and an 8 bit adjusted immediate operand. This is necessary
593 because when we emit a compare, we don't know what the condition will be,
594 so we need the union of the immediates accepted by GT and LT. */
597 gr_reg_or_8bit_and_adjusted_operand (op, mode)
599 enum machine_mode mode;
601 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
602 && CONST_OK_FOR_L (INTVAL (op)))
603 || GET_CODE (op) == CONSTANT_P_RTX
604 || gr_register_operand (op, mode));
607 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
610 gr_reg_or_14bit_operand (op, mode)
612 enum machine_mode mode;
614 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
615 || GET_CODE (op) == CONSTANT_P_RTX
616 || gr_register_operand (op, mode));
619 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
622 gr_reg_or_22bit_operand (op, mode)
624 enum machine_mode mode;
626 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
627 || GET_CODE (op) == CONSTANT_P_RTX
628 || gr_register_operand (op, mode));
631 /* Return 1 if OP is a 6 bit immediate operand. */
634 shift_count_operand (op, mode)
636 enum machine_mode mode ATTRIBUTE_UNUSED;
638 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
639 || GET_CODE (op) == CONSTANT_P_RTX);
642 /* Return 1 if OP is a 5 bit immediate operand. */
645 shift_32bit_count_operand (op, mode)
647 enum machine_mode mode ATTRIBUTE_UNUSED;
649 return ((GET_CODE (op) == CONST_INT
650 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
651 || GET_CODE (op) == CONSTANT_P_RTX);
654 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
657 shladd_operand (op, mode)
659 enum machine_mode mode ATTRIBUTE_UNUSED;
661 return (GET_CODE (op) == CONST_INT
662 && (INTVAL (op) == 2 || INTVAL (op) == 4
663 || INTVAL (op) == 8 || INTVAL (op) == 16));
666 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
669 fetchadd_operand (op, mode)
671 enum machine_mode mode ATTRIBUTE_UNUSED;
673 return (GET_CODE (op) == CONST_INT
674 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
675 INTVAL (op) == -4 || INTVAL (op) == -1 ||
676 INTVAL (op) == 1 || INTVAL (op) == 4 ||
677 INTVAL (op) == 8 || INTVAL (op) == 16));
680 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
683 fr_reg_or_fp01_operand (op, mode)
685 enum machine_mode mode;
687 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
688 || fr_register_operand (op, mode));
691 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
692 POST_MODIFY with a REG as displacement. */
695 destination_operand (op, mode)
697 enum machine_mode mode;
699 if (! nonimmediate_operand (op, mode))
701 if (GET_CODE (op) == MEM
702 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
703 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
708 /* Like memory_operand, but don't allow post-increments. */
711 not_postinc_memory_operand (op, mode)
713 enum machine_mode mode;
715 return (memory_operand (op, mode)
716 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
719 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
720 signed immediate operand. */
723 normal_comparison_operator (op, mode)
725 enum machine_mode mode;
727 enum rtx_code code = GET_CODE (op);
728 return ((mode == VOIDmode || GET_MODE (op) == mode)
729 && (code == EQ || code == NE
730 || code == GT || code == LE || code == GTU || code == LEU));
733 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
734 signed immediate operand. */
737 adjusted_comparison_operator (op, mode)
739 enum machine_mode mode;
741 enum rtx_code code = GET_CODE (op);
742 return ((mode == VOIDmode || GET_MODE (op) == mode)
743 && (code == LT || code == GE || code == LTU || code == GEU));
746 /* Return 1 if this is a signed inequality operator. */
749 signed_inequality_operator (op, mode)
751 enum machine_mode mode;
753 enum rtx_code code = GET_CODE (op);
754 return ((mode == VOIDmode || GET_MODE (op) == mode)
755 && (code == GE || code == GT
756 || code == LE || code == LT));
759 /* Return 1 if this operator is valid for predication. */
762 predicate_operator (op, mode)
764 enum machine_mode mode;
766 enum rtx_code code = GET_CODE (op);
767 return ((GET_MODE (op) == mode || mode == VOIDmode)
768 && (code == EQ || code == NE));
771 /* Return 1 if this operator can be used in a conditional operation. */
774 condop_operator (op, mode)
776 enum machine_mode mode;
778 enum rtx_code code = GET_CODE (op);
779 return ((GET_MODE (op) == mode || mode == VOIDmode)
780 && (code == PLUS || code == MINUS || code == AND
781 || code == IOR || code == XOR));
784 /* Return 1 if this is the ar.lc register. */
787 ar_lc_reg_operand (op, mode)
789 enum machine_mode mode;
791 return (GET_MODE (op) == DImode
792 && (mode == DImode || mode == VOIDmode)
793 && GET_CODE (op) == REG
794 && REGNO (op) == AR_LC_REGNUM);
797 /* Return 1 if this is the ar.ccv register. */
800 ar_ccv_reg_operand (op, mode)
802 enum machine_mode mode;
804 return ((GET_MODE (op) == mode || mode == VOIDmode)
805 && GET_CODE (op) == REG
806 && REGNO (op) == AR_CCV_REGNUM);
809 /* Return 1 if this is the ar.pfs register. */
812 ar_pfs_reg_operand (op, mode)
814 enum machine_mode mode;
816 return ((GET_MODE (op) == mode || mode == VOIDmode)
817 && GET_CODE (op) == REG
818 && REGNO (op) == AR_PFS_REGNUM);
821 /* Like general_operand, but don't allow (mem (addressof)). */
824 general_tfmode_operand (op, mode)
826 enum machine_mode mode;
828 if (! general_operand (op, mode))
830 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
838 destination_tfmode_operand (op, mode)
840 enum machine_mode mode;
842 if (! destination_operand (op, mode))
844 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
852 tfreg_or_fp01_operand (op, mode)
854 enum machine_mode mode;
856 if (GET_CODE (op) == SUBREG)
858 return fr_reg_or_fp01_operand (op, mode);
861 /* Return 1 if the operands of a move are ok. */
864 ia64_move_ok (dst, src)
867 /* If we're under init_recog_no_volatile, we'll not be able to use
868 memory_operand. So check the code directly and don't worry about
869 the validity of the underlying address, which should have been
870 checked elsewhere anyway. */
871 if (GET_CODE (dst) != MEM)
873 if (GET_CODE (src) == MEM)
875 if (register_operand (src, VOIDmode))
878 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
879 if (INTEGRAL_MODE_P (GET_MODE (dst)))
880 return src == const0_rtx;
882 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
885 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
886 Return the length of the field, or <= 0 on failure. */
889 ia64_depz_field_mask (rop, rshift)
892 unsigned HOST_WIDE_INT op = INTVAL (rop);
893 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
895 /* Get rid of the zero bits we're shifting in. */
898 /* We must now have a solid block of 1's at bit 0. */
899 return exact_log2 (op + 1);
902 /* Expand a symbolic constant load. */
903 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
906 ia64_expand_load_address (dest, src, scratch)
907 rtx dest, src, scratch;
911 /* The destination could be a MEM during initial rtl generation,
912 which isn't a valid destination for the PIC load address patterns. */
913 if (! register_operand (dest, DImode))
914 temp = gen_reg_rtx (DImode);
919 emit_insn (gen_load_gprel64 (temp, src));
920 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
921 emit_insn (gen_load_fptr (temp, src));
922 else if (sdata_symbolic_operand (src, DImode))
923 emit_insn (gen_load_gprel (temp, src));
924 else if (GET_CODE (src) == CONST
925 && GET_CODE (XEXP (src, 0)) == PLUS
926 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
927 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
929 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
930 rtx sym = XEXP (XEXP (src, 0), 0);
931 HOST_WIDE_INT ofs, hi, lo;
933 /* Split the offset into a sign extended 14-bit low part
934 and a complementary high part. */
935 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
936 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
940 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
942 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
944 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
950 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
952 insn = emit_insn (gen_load_symptr (temp, src, scratch));
953 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
957 emit_move_insn (dest, temp);
961 ia64_gp_save_reg (setjmp_p)
964 rtx save = cfun->machine->ia64_gp_save;
968 /* We can't save GP in a pseudo if we are calling setjmp, because
969 pseudos won't be restored by longjmp. For now, we save it in r4. */
970 /* ??? It would be more efficient to save this directly into a stack
971 slot. Unfortunately, the stack slot address gets cse'd across
972 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
975 /* ??? Get the barf bag, Virginia. We've got to replace this thing
976 in place, since this rtx is used in exception handling receivers.
977 Moreover, we must get this rtx out of regno_reg_rtx or reload
978 will do the wrong thing. */
979 unsigned int old_regno = REGNO (save);
980 if (setjmp_p && old_regno != GR_REG (4))
982 REGNO (save) = GR_REG (4);
983 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
989 save = gen_rtx_REG (DImode, GR_REG (4));
991 save = gen_rtx_REG (DImode, LOC_REG (0));
993 save = gen_reg_rtx (DImode);
994 cfun->machine->ia64_gp_save = save;
1000 /* Split a post-reload TImode reference into two DImode components. */
1003 ia64_split_timode (out, in, scratch)
1007 switch (GET_CODE (in))
1010 out[0] = gen_rtx_REG (DImode, REGNO (in));
1011 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1016 rtx base = XEXP (in, 0);
1018 switch (GET_CODE (base))
1021 out[0] = adjust_address (in, DImode, 0);
1024 base = XEXP (base, 0);
1025 out[0] = adjust_address (in, DImode, 0);
1028 /* Since we're changing the mode, we need to change to POST_MODIFY
1029 as well to preserve the size of the increment. Either that or
1030 do the update in two steps, but we've already got this scratch
1031 register handy so let's use it. */
1033 base = XEXP (base, 0);
1035 = change_address (in, DImode,
1037 (Pmode, base, plus_constant (base, 16)));
1040 base = XEXP (base, 0);
1042 = change_address (in, DImode,
1044 (Pmode, base, plus_constant (base, -16)));
1050 if (scratch == NULL_RTX)
1052 out[1] = change_address (in, DImode, scratch);
1053 return gen_adddi3 (scratch, base, GEN_INT (8));
1058 split_double (in, &out[0], &out[1]);
1066 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1067 through memory plus an extra GR scratch register. Except that you can
1068 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1069 SECONDARY_RELOAD_CLASS, but not both.
1071 We got into problems in the first place by allowing a construct like
1072 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1073 This solution attempts to prevent this situation from occurring. When
1074 we see something like the above, we spill the inner register to memory. */
1077 spill_tfmode_operand (in, force)
1081 if (GET_CODE (in) == SUBREG
1082 && GET_MODE (SUBREG_REG (in)) == TImode
1083 && GET_CODE (SUBREG_REG (in)) == REG)
1085 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1086 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1088 else if (force && GET_CODE (in) == REG)
1090 rtx mem = gen_mem_addressof (in, NULL_TREE);
1091 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1093 else if (GET_CODE (in) == MEM
1094 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1095 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1100 /* Emit comparison instruction if necessary, returning the expression
1101 that holds the compare result in the proper mode. */
1104 ia64_expand_compare (code, mode)
1106 enum machine_mode mode;
1108 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1111 /* If we have a BImode input, then we already have a compare result, and
1112 do not need to emit another comparison. */
1113 if (GET_MODE (op0) == BImode)
1115 if ((code == NE || code == EQ) && op1 == const0_rtx)
1122 cmp = gen_reg_rtx (BImode);
1123 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1124 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1128 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1131 /* Emit the appropriate sequence for a call. */
1134 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1140 rtx insn, b0, pfs, gp_save, narg_rtx;
1143 addr = XEXP (addr, 0);
1144 b0 = gen_rtx_REG (DImode, R_BR (0));
1145 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1149 else if (IN_REGNO_P (REGNO (nextarg)))
1150 narg = REGNO (nextarg) - IN_REG (0);
1152 narg = REGNO (nextarg) - OUT_REG (0);
1153 narg_rtx = GEN_INT (narg);
1155 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1158 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1160 insn = gen_call_nopic (addr, narg_rtx, b0);
1162 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1163 emit_call_insn (insn);
1170 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1172 /* If this is an indirect call, then we have the address of a descriptor. */
1173 if (! symbolic_operand (addr, VOIDmode))
1178 emit_move_insn (gp_save, pic_offset_table_rtx);
1180 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1181 emit_move_insn (pic_offset_table_rtx,
1182 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1185 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1187 insn = gen_call_pic (dest, narg_rtx, b0);
1189 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1190 emit_call_insn (insn);
1193 emit_move_insn (pic_offset_table_rtx, gp_save);
1195 else if (TARGET_CONST_GP)
1198 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1200 insn = gen_call_nopic (addr, narg_rtx, b0);
1202 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1203 emit_call_insn (insn);
1208 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0, pfs));
1211 emit_move_insn (gp_save, pic_offset_table_rtx);
1214 insn = gen_call_pic (addr, narg_rtx, b0);
1216 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1217 emit_call_insn (insn);
1219 emit_move_insn (pic_offset_table_rtx, gp_save);
1224 /* Begin the assembly file. */
1227 emit_safe_across_calls (f)
1230 unsigned int rs, re;
1237 while (rs < 64 && call_used_regs[PR_REG (rs)])
1241 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1245 fputs ("\t.pred.safe_across_calls ", f);
1251 fprintf (f, "p%u", rs);
1253 fprintf (f, "p%u-p%u", rs, re - 1);
1261 /* Structure to be filled in by ia64_compute_frame_size with register
1262 save masks and offsets for the current function. */
1264 struct ia64_frame_info
1266 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1267 the caller's scratch area. */
1268 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1269 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1270 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1271 HARD_REG_SET mask; /* mask of saved registers. */
1272 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1273 registers or long-term scratches. */
1274 int n_spilled; /* number of spilled registers. */
1275 int reg_fp; /* register for fp. */
1276 int reg_save_b0; /* save register for b0. */
1277 int reg_save_pr; /* save register for prs. */
1278 int reg_save_ar_pfs; /* save register for ar.pfs. */
1279 int reg_save_ar_unat; /* save register for ar.unat. */
1280 int reg_save_ar_lc; /* save register for ar.lc. */
1281 int n_input_regs; /* number of input registers used. */
1282 int n_local_regs; /* number of local registers used. */
1283 int n_output_regs; /* number of output registers used. */
1284 int n_rotate_regs; /* number of rotating registers used. */
1286 char need_regstk; /* true if a .regstk directive needed. */
1287 char initialized; /* true if the data is finalized. */
1290 /* Current frame information calculated by ia64_compute_frame_size. */
1291 static struct ia64_frame_info current_frame_info;
1293 /* Helper function for ia64_compute_frame_size: find an appropriate general
1294 register to spill some special register to. SPECIAL_SPILL_MASK contains
1295 bits in GR0 to GR31 that have already been allocated by this routine.
1296 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1299 find_gr_spill (try_locals)
1304 /* If this is a leaf function, first try an otherwise unused
1305 call-clobbered register. */
1306 if (current_function_is_leaf)
1308 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1309 if (! regs_ever_live[regno]
1310 && call_used_regs[regno]
1311 && ! fixed_regs[regno]
1312 && ! global_regs[regno]
1313 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1315 current_frame_info.gr_used_mask |= 1 << regno;
1322 regno = current_frame_info.n_local_regs;
1323 /* If there is a frame pointer, then we can't use loc79, because
1324 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1325 reg_name switching code in ia64_expand_prologue. */
1326 if (regno < (80 - frame_pointer_needed))
1328 current_frame_info.n_local_regs = regno + 1;
1329 return LOC_REG (0) + regno;
1333 /* Failed to find a general register to spill to. Must use stack. */
1337 /* In order to make for nice schedules, we try to allocate every temporary
1338 to a different register. We must of course stay away from call-saved,
1339 fixed, and global registers. We must also stay away from registers
1340 allocated in current_frame_info.gr_used_mask, since those include regs
1341 used all through the prologue.
1343 Any register allocated here must be used immediately. The idea is to
1344 aid scheduling, not to solve data flow problems. */
1346 static int last_scratch_gr_reg;
1349 next_scratch_gr_reg ()
1353 for (i = 0; i < 32; ++i)
1355 regno = (last_scratch_gr_reg + i + 1) & 31;
1356 if (call_used_regs[regno]
1357 && ! fixed_regs[regno]
1358 && ! global_regs[regno]
1359 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1361 last_scratch_gr_reg = regno;
1366 /* There must be _something_ available. */
1370 /* Helper function for ia64_compute_frame_size, called through
1371 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1374 mark_reg_gr_used_mask (reg, data)
1376 void *data ATTRIBUTE_UNUSED;
1378 unsigned int regno = REGNO (reg);
1381 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1382 for (i = 0; i < n; ++i)
1383 current_frame_info.gr_used_mask |= 1 << (regno + i);
1387 /* Returns the number of bytes offset between the frame pointer and the stack
1388 pointer for the current function. SIZE is the number of bytes of space
1389 needed for local variables. */
1392 ia64_compute_frame_size (size)
1395 HOST_WIDE_INT total_size;
1396 HOST_WIDE_INT spill_size = 0;
1397 HOST_WIDE_INT extra_spill_size = 0;
1398 HOST_WIDE_INT pretend_args_size;
1401 int spilled_gr_p = 0;
1402 int spilled_fr_p = 0;
1406 if (current_frame_info.initialized)
1409 memset (¤t_frame_info, 0, sizeof current_frame_info);
1410 CLEAR_HARD_REG_SET (mask);
1412 /* Don't allocate scratches to the return register. */
1413 diddle_return_value (mark_reg_gr_used_mask, NULL);
1415 /* Don't allocate scratches to the EH scratch registers. */
1416 if (cfun->machine->ia64_eh_epilogue_sp)
1417 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1418 if (cfun->machine->ia64_eh_epilogue_bsp)
1419 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1421 /* Find the size of the register stack frame. We have only 80 local
1422 registers, because we reserve 8 for the inputs and 8 for the
1425 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1426 since we'll be adjusting that down later. */
1427 regno = LOC_REG (78) + ! frame_pointer_needed;
1428 for (; regno >= LOC_REG (0); regno--)
1429 if (regs_ever_live[regno])
1431 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1433 /* For functions marked with the syscall_linkage attribute, we must mark
1434 all eight input registers as in use, so that locals aren't visible to
1437 if (cfun->machine->n_varargs > 0
1438 || lookup_attribute ("syscall_linkage",
1439 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1440 current_frame_info.n_input_regs = 8;
1443 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1444 if (regs_ever_live[regno])
1446 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1449 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1450 if (regs_ever_live[regno])
1452 i = regno - OUT_REG (0) + 1;
1454 /* When -p profiling, we need one output register for the mcount argument.
1455 Likwise for -a profiling for the bb_init_func argument. For -ax
1456 profiling, we need two output registers for the two bb_init_trace_func
1458 if (current_function_profile)
1460 current_frame_info.n_output_regs = i;
1462 /* ??? No rotating register support yet. */
1463 current_frame_info.n_rotate_regs = 0;
1465 /* Discover which registers need spilling, and how much room that
1466 will take. Begin with floating point and general registers,
1467 which will always wind up on the stack. */
1469 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1470 if (regs_ever_live[regno] && ! call_used_regs[regno])
1472 SET_HARD_REG_BIT (mask, regno);
1478 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1479 if (regs_ever_live[regno] && ! call_used_regs[regno])
1481 SET_HARD_REG_BIT (mask, regno);
1487 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1488 if (regs_ever_live[regno] && ! call_used_regs[regno])
1490 SET_HARD_REG_BIT (mask, regno);
1495 /* Now come all special registers that might get saved in other
1496 general registers. */
1498 if (frame_pointer_needed)
1500 current_frame_info.reg_fp = find_gr_spill (1);
1501 /* If we did not get a register, then we take LOC79. This is guaranteed
1502 to be free, even if regs_ever_live is already set, because this is
1503 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1504 as we don't count loc79 above. */
1505 if (current_frame_info.reg_fp == 0)
1507 current_frame_info.reg_fp = LOC_REG (79);
1508 current_frame_info.n_local_regs++;
1512 if (! current_function_is_leaf)
1514 /* Emit a save of BR0 if we call other functions. Do this even
1515 if this function doesn't return, as EH depends on this to be
1516 able to unwind the stack. */
1517 SET_HARD_REG_BIT (mask, BR_REG (0));
1519 current_frame_info.reg_save_b0 = find_gr_spill (1);
1520 if (current_frame_info.reg_save_b0 == 0)
1526 /* Similarly for ar.pfs. */
1527 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1528 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1529 if (current_frame_info.reg_save_ar_pfs == 0)
1531 extra_spill_size += 8;
1537 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1539 SET_HARD_REG_BIT (mask, BR_REG (0));
1545 /* Unwind descriptor hackery: things are most efficient if we allocate
1546 consecutive GR save registers for RP, PFS, FP in that order. However,
1547 it is absolutely critical that FP get the only hard register that's
1548 guaranteed to be free, so we allocated it first. If all three did
1549 happen to be allocated hard regs, and are consecutive, rearrange them
1550 into the preferred order now. */
1551 if (current_frame_info.reg_fp != 0
1552 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1553 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1555 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1556 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1557 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1560 /* See if we need to store the predicate register block. */
1561 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1562 if (regs_ever_live[regno] && ! call_used_regs[regno])
1564 if (regno <= PR_REG (63))
1566 SET_HARD_REG_BIT (mask, PR_REG (0));
1567 current_frame_info.reg_save_pr = find_gr_spill (1);
1568 if (current_frame_info.reg_save_pr == 0)
1570 extra_spill_size += 8;
1574 /* ??? Mark them all as used so that register renaming and such
1575 are free to use them. */
1576 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1577 regs_ever_live[regno] = 1;
1580 /* If we're forced to use st8.spill, we're forced to save and restore
1582 if (spilled_gr_p || cfun->machine->n_varargs)
1584 regs_ever_live[AR_UNAT_REGNUM] = 1;
1585 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1586 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1587 if (current_frame_info.reg_save_ar_unat == 0)
1589 extra_spill_size += 8;
1594 if (regs_ever_live[AR_LC_REGNUM])
1596 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1597 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1598 if (current_frame_info.reg_save_ar_lc == 0)
1600 extra_spill_size += 8;
1605 /* If we have an odd number of words of pretend arguments written to
1606 the stack, then the FR save area will be unaligned. We round the
1607 size of this area up to keep things 16 byte aligned. */
1609 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1611 pretend_args_size = current_function_pretend_args_size;
1613 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1614 + current_function_outgoing_args_size);
1615 total_size = IA64_STACK_ALIGN (total_size);
1617 /* We always use the 16-byte scratch area provided by the caller, but
1618 if we are a leaf function, there's no one to which we need to provide
1620 if (current_function_is_leaf)
1621 total_size = MAX (0, total_size - 16);
1623 current_frame_info.total_size = total_size;
1624 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1625 current_frame_info.spill_size = spill_size;
1626 current_frame_info.extra_spill_size = extra_spill_size;
1627 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1628 current_frame_info.n_spilled = n_spilled;
1629 current_frame_info.initialized = reload_completed;
1632 /* Compute the initial difference between the specified pair of registers. */
1635 ia64_initial_elimination_offset (from, to)
1638 HOST_WIDE_INT offset;
1640 ia64_compute_frame_size (get_frame_size ());
1643 case FRAME_POINTER_REGNUM:
1644 if (to == HARD_FRAME_POINTER_REGNUM)
1646 if (current_function_is_leaf)
1647 offset = -current_frame_info.total_size;
1649 offset = -(current_frame_info.total_size
1650 - current_function_outgoing_args_size - 16);
1652 else if (to == STACK_POINTER_REGNUM)
1654 if (current_function_is_leaf)
1657 offset = 16 + current_function_outgoing_args_size;
1663 case ARG_POINTER_REGNUM:
1664 /* Arguments start above the 16 byte save area, unless stdarg
1665 in which case we store through the 16 byte save area. */
1666 if (to == HARD_FRAME_POINTER_REGNUM)
1667 offset = 16 - current_function_pretend_args_size;
1668 else if (to == STACK_POINTER_REGNUM)
1669 offset = (current_frame_info.total_size
1670 + 16 - current_function_pretend_args_size);
1675 case RETURN_ADDRESS_POINTER_REGNUM:
1686 /* If there are more than a trivial number of register spills, we use
1687 two interleaved iterators so that we can get two memory references
1690 In order to simplify things in the prologue and epilogue expanders,
1691 we use helper functions to fix up the memory references after the
1692 fact with the appropriate offsets to a POST_MODIFY memory mode.
1693 The following data structure tracks the state of the two iterators
1694 while insns are being emitted. */
1696 struct spill_fill_data
1698 rtx init_after; /* point at which to emit initializations */
1699 rtx init_reg[2]; /* initial base register */
1700 rtx iter_reg[2]; /* the iterator registers */
1701 rtx *prev_addr[2]; /* address of last memory use */
1702 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1703 HOST_WIDE_INT prev_off[2]; /* last offset */
1704 int n_iter; /* number of iterators in use */
1705 int next_iter; /* next iterator to use */
1706 unsigned int save_gr_used_mask;
1709 static struct spill_fill_data spill_fill_data;
1712 setup_spill_pointers (n_spills, init_reg, cfa_off)
1715 HOST_WIDE_INT cfa_off;
1719 spill_fill_data.init_after = get_last_insn ();
1720 spill_fill_data.init_reg[0] = init_reg;
1721 spill_fill_data.init_reg[1] = init_reg;
1722 spill_fill_data.prev_addr[0] = NULL;
1723 spill_fill_data.prev_addr[1] = NULL;
1724 spill_fill_data.prev_insn[0] = NULL;
1725 spill_fill_data.prev_insn[1] = NULL;
1726 spill_fill_data.prev_off[0] = cfa_off;
1727 spill_fill_data.prev_off[1] = cfa_off;
1728 spill_fill_data.next_iter = 0;
1729 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1731 spill_fill_data.n_iter = 1 + (n_spills > 2);
1732 for (i = 0; i < spill_fill_data.n_iter; ++i)
1734 int regno = next_scratch_gr_reg ();
1735 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1736 current_frame_info.gr_used_mask |= 1 << regno;
1741 finish_spill_pointers ()
1743 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1747 spill_restore_mem (reg, cfa_off)
1749 HOST_WIDE_INT cfa_off;
1751 int iter = spill_fill_data.next_iter;
1752 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1753 rtx disp_rtx = GEN_INT (disp);
1756 if (spill_fill_data.prev_addr[iter])
1758 if (CONST_OK_FOR_N (disp))
1760 *spill_fill_data.prev_addr[iter]
1761 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1762 gen_rtx_PLUS (DImode,
1763 spill_fill_data.iter_reg[iter],
1765 REG_NOTES (spill_fill_data.prev_insn[iter])
1766 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1767 REG_NOTES (spill_fill_data.prev_insn[iter]));
1771 /* ??? Could use register post_modify for loads. */
1772 if (! CONST_OK_FOR_I (disp))
1774 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1775 emit_move_insn (tmp, disp_rtx);
1778 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1779 spill_fill_data.iter_reg[iter], disp_rtx));
1782 /* Micro-optimization: if we've created a frame pointer, it's at
1783 CFA 0, which may allow the real iterator to be initialized lower,
1784 slightly increasing parallelism. Also, if there are few saves
1785 it may eliminate the iterator entirely. */
1787 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1788 && frame_pointer_needed)
1790 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1791 set_mem_alias_set (mem, get_varargs_alias_set ());
1799 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1800 spill_fill_data.init_reg[iter]);
1805 if (! CONST_OK_FOR_I (disp))
1807 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1808 emit_move_insn (tmp, disp_rtx);
1812 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1813 spill_fill_data.init_reg[iter],
1816 seq = gen_sequence ();
1820 /* Careful for being the first insn in a sequence. */
1821 if (spill_fill_data.init_after)
1822 insn = emit_insn_after (seq, spill_fill_data.init_after);
1825 rtx first = get_insns ();
1827 insn = emit_insn_before (seq, first);
1829 insn = emit_insn (seq);
1831 spill_fill_data.init_after = insn;
1833 /* If DISP is 0, we may or may not have a further adjustment
1834 afterward. If we do, then the load/store insn may be modified
1835 to be a post-modify. If we don't, then this copy may be
1836 eliminated by copyprop_hardreg_forward, which makes this
1837 insn garbage, which runs afoul of the sanity check in
1838 propagate_one_insn. So mark this insn as legal to delete. */
1840 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1844 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1846 /* ??? Not all of the spills are for varargs, but some of them are.
1847 The rest of the spills belong in an alias set of their own. But
1848 it doesn't actually hurt to include them here. */
1849 set_mem_alias_set (mem, get_varargs_alias_set ());
1851 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1852 spill_fill_data.prev_off[iter] = cfa_off;
1854 if (++iter >= spill_fill_data.n_iter)
1856 spill_fill_data.next_iter = iter;
1862 do_spill (move_fn, reg, cfa_off, frame_reg)
1863 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1865 HOST_WIDE_INT cfa_off;
1867 int iter = spill_fill_data.next_iter;
1870 mem = spill_restore_mem (reg, cfa_off);
1871 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1872 spill_fill_data.prev_insn[iter] = insn;
1879 RTX_FRAME_RELATED_P (insn) = 1;
1881 /* Don't even pretend that the unwind code can intuit its way
1882 through a pair of interleaved post_modify iterators. Just
1883 provide the correct answer. */
1885 if (frame_pointer_needed)
1887 base = hard_frame_pointer_rtx;
1892 base = stack_pointer_rtx;
1893 off = current_frame_info.total_size - cfa_off;
1897 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1898 gen_rtx_SET (VOIDmode,
1899 gen_rtx_MEM (GET_MODE (reg),
1900 plus_constant (base, off)),
1907 do_restore (move_fn, reg, cfa_off)
1908 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1910 HOST_WIDE_INT cfa_off;
1912 int iter = spill_fill_data.next_iter;
1915 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1916 GEN_INT (cfa_off)));
1917 spill_fill_data.prev_insn[iter] = insn;
1920 /* Wrapper functions that discards the CONST_INT spill offset. These
1921 exist so that we can give gr_spill/gr_fill the offset they need and
1922 use a consistant function interface. */
1925 gen_movdi_x (dest, src, offset)
1927 rtx offset ATTRIBUTE_UNUSED;
1929 return gen_movdi (dest, src);
1933 gen_fr_spill_x (dest, src, offset)
1935 rtx offset ATTRIBUTE_UNUSED;
1937 return gen_fr_spill (dest, src);
1941 gen_fr_restore_x (dest, src, offset)
1943 rtx offset ATTRIBUTE_UNUSED;
1945 return gen_fr_restore (dest, src);
1948 /* Called after register allocation to add any instructions needed for the
1949 prologue. Using a prologue insn is favored compared to putting all of the
1950 instructions in output_function_prologue(), since it allows the scheduler
1951 to intermix instructions with the saves of the caller saved registers. In
1952 some cases, it might be necessary to emit a barrier instruction as the last
1953 insn to prevent such scheduling.
1955 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1956 so that the debug info generation code can handle them properly.
1958 The register save area is layed out like so:
1960 [ varargs spill area ]
1961 [ fr register spill area ]
1962 [ br register spill area ]
1963 [ ar register spill area ]
1964 [ pr register spill area ]
1965 [ gr register spill area ] */
1967 /* ??? Get inefficient code when the frame size is larger than can fit in an
1968 adds instruction. */
1971 ia64_expand_prologue ()
1973 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1974 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1977 ia64_compute_frame_size (get_frame_size ());
1978 last_scratch_gr_reg = 15;
1980 /* If there is no epilogue, then we don't need some prologue insns.
1981 We need to avoid emitting the dead prologue insns, because flow
1982 will complain about them. */
1987 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1988 if ((e->flags & EDGE_FAKE) == 0
1989 && (e->flags & EDGE_FALLTHRU) != 0)
1991 epilogue_p = (e != NULL);
1996 /* Set the local, input, and output register names. We need to do this
1997 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1998 half. If we use in/loc/out register names, then we get assembler errors
1999 in crtn.S because there is no alloc insn or regstk directive in there. */
2000 if (! TARGET_REG_NAMES)
2002 int inputs = current_frame_info.n_input_regs;
2003 int locals = current_frame_info.n_local_regs;
2004 int outputs = current_frame_info.n_output_regs;
2006 for (i = 0; i < inputs; i++)
2007 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2008 for (i = 0; i < locals; i++)
2009 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2010 for (i = 0; i < outputs; i++)
2011 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2014 /* Set the frame pointer register name. The regnum is logically loc79,
2015 but of course we'll not have allocated that many locals. Rather than
2016 worrying about renumbering the existing rtxs, we adjust the name. */
2017 /* ??? This code means that we can never use one local register when
2018 there is a frame pointer. loc79 gets wasted in this case, as it is
2019 renamed to a register that will never be used. See also the try_locals
2020 code in find_gr_spill. */
2021 if (current_frame_info.reg_fp)
2023 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2024 reg_names[HARD_FRAME_POINTER_REGNUM]
2025 = reg_names[current_frame_info.reg_fp];
2026 reg_names[current_frame_info.reg_fp] = tmp;
2029 /* Fix up the return address placeholder. */
2030 /* ??? We can fail if __builtin_return_address is used, and we didn't
2031 allocate a register in which to save b0. I can't think of a way to
2032 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2033 then be sure that I got the right one. Further, reload doesn't seem
2034 to care if an eliminable register isn't used, and "eliminates" it
2036 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2037 && current_frame_info.reg_save_b0 != 0)
2038 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2040 /* We don't need an alloc instruction if we've used no outputs or locals. */
2041 if (current_frame_info.n_local_regs == 0
2042 && current_frame_info.n_output_regs == 0
2043 && current_frame_info.n_input_regs <= current_function_args_info.words)
2045 /* If there is no alloc, but there are input registers used, then we
2046 need a .regstk directive. */
2047 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2048 ar_pfs_save_reg = NULL_RTX;
2052 current_frame_info.need_regstk = 0;
2054 if (current_frame_info.reg_save_ar_pfs)
2055 regno = current_frame_info.reg_save_ar_pfs;
2057 regno = next_scratch_gr_reg ();
2058 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2060 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2061 GEN_INT (current_frame_info.n_input_regs),
2062 GEN_INT (current_frame_info.n_local_regs),
2063 GEN_INT (current_frame_info.n_output_regs),
2064 GEN_INT (current_frame_info.n_rotate_regs)));
2065 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2068 /* Set up frame pointer, stack pointer, and spill iterators. */
2070 n_varargs = cfun->machine->n_varargs;
2071 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2072 stack_pointer_rtx, 0);
2074 if (frame_pointer_needed)
2076 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2077 RTX_FRAME_RELATED_P (insn) = 1;
2080 if (current_frame_info.total_size != 0)
2082 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2085 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2086 offset = frame_size_rtx;
2089 regno = next_scratch_gr_reg ();
2090 offset = gen_rtx_REG (DImode, regno);
2091 emit_move_insn (offset, frame_size_rtx);
2094 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2095 stack_pointer_rtx, offset));
2097 if (! frame_pointer_needed)
2099 RTX_FRAME_RELATED_P (insn) = 1;
2100 if (GET_CODE (offset) != CONST_INT)
2103 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2104 gen_rtx_SET (VOIDmode,
2106 gen_rtx_PLUS (DImode,
2113 /* ??? At this point we must generate a magic insn that appears to
2114 modify the stack pointer, the frame pointer, and all spill
2115 iterators. This would allow the most scheduling freedom. For
2116 now, just hard stop. */
2117 emit_insn (gen_blockage ());
2120 /* Must copy out ar.unat before doing any integer spills. */
2121 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2123 if (current_frame_info.reg_save_ar_unat)
2125 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2128 alt_regno = next_scratch_gr_reg ();
2129 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2130 current_frame_info.gr_used_mask |= 1 << alt_regno;
2133 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2134 insn = emit_move_insn (ar_unat_save_reg, reg);
2135 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2137 /* Even if we're not going to generate an epilogue, we still
2138 need to save the register so that EH works. */
2139 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2140 emit_insn (gen_prologue_use (ar_unat_save_reg));
2143 ar_unat_save_reg = NULL_RTX;
2145 /* Spill all varargs registers. Do this before spilling any GR registers,
2146 since we want the UNAT bits for the GR registers to override the UNAT
2147 bits from varargs, which we don't care about. */
2150 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2152 reg = gen_rtx_REG (DImode, regno);
2153 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2156 /* Locate the bottom of the register save area. */
2157 cfa_off = (current_frame_info.spill_cfa_off
2158 + current_frame_info.spill_size
2159 + current_frame_info.extra_spill_size);
2161 /* Save the predicate register block either in a register or in memory. */
2162 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2164 reg = gen_rtx_REG (DImode, PR_REG (0));
2165 if (current_frame_info.reg_save_pr != 0)
2167 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2168 insn = emit_move_insn (alt_reg, reg);
2170 /* ??? Denote pr spill/fill by a DImode move that modifies all
2171 64 hard registers. */
2172 RTX_FRAME_RELATED_P (insn) = 1;
2174 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2175 gen_rtx_SET (VOIDmode, alt_reg, reg),
2178 /* Even if we're not going to generate an epilogue, we still
2179 need to save the register so that EH works. */
2181 emit_insn (gen_prologue_use (alt_reg));
2185 alt_regno = next_scratch_gr_reg ();
2186 alt_reg = gen_rtx_REG (DImode, alt_regno);
2187 insn = emit_move_insn (alt_reg, reg);
2188 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2193 /* Handle AR regs in numerical order. All of them get special handling. */
2194 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2195 && current_frame_info.reg_save_ar_unat == 0)
2197 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2198 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2202 /* The alloc insn already copied ar.pfs into a general register. The
2203 only thing we have to do now is copy that register to a stack slot
2204 if we'd not allocated a local register for the job. */
2205 if (current_frame_info.reg_save_ar_pfs == 0
2206 && ! current_function_is_leaf)
2208 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2209 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2213 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2215 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2216 if (current_frame_info.reg_save_ar_lc != 0)
2218 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2219 insn = emit_move_insn (alt_reg, reg);
2220 RTX_FRAME_RELATED_P (insn) = 1;
2222 /* Even if we're not going to generate an epilogue, we still
2223 need to save the register so that EH works. */
2225 emit_insn (gen_prologue_use (alt_reg));
2229 alt_regno = next_scratch_gr_reg ();
2230 alt_reg = gen_rtx_REG (DImode, alt_regno);
2231 emit_move_insn (alt_reg, reg);
2232 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2237 /* We should now be at the base of the gr/br/fr spill area. */
2238 if (cfa_off != (current_frame_info.spill_cfa_off
2239 + current_frame_info.spill_size))
2242 /* Spill all general registers. */
2243 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2244 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2246 reg = gen_rtx_REG (DImode, regno);
2247 do_spill (gen_gr_spill, reg, cfa_off, reg);
2251 /* Handle BR0 specially -- it may be getting stored permanently in
2252 some GR register. */
2253 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2255 reg = gen_rtx_REG (DImode, BR_REG (0));
2256 if (current_frame_info.reg_save_b0 != 0)
2258 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2259 insn = emit_move_insn (alt_reg, reg);
2260 RTX_FRAME_RELATED_P (insn) = 1;
2262 /* Even if we're not going to generate an epilogue, we still
2263 need to save the register so that EH works. */
2265 emit_insn (gen_prologue_use (alt_reg));
2269 alt_regno = next_scratch_gr_reg ();
2270 alt_reg = gen_rtx_REG (DImode, alt_regno);
2271 emit_move_insn (alt_reg, reg);
2272 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2277 /* Spill the rest of the BR registers. */
2278 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2279 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2281 alt_regno = next_scratch_gr_reg ();
2282 alt_reg = gen_rtx_REG (DImode, alt_regno);
2283 reg = gen_rtx_REG (DImode, regno);
2284 emit_move_insn (alt_reg, reg);
2285 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2289 /* Align the frame and spill all FR registers. */
2290 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2291 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2295 reg = gen_rtx_REG (TFmode, regno);
2296 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2300 if (cfa_off != current_frame_info.spill_cfa_off)
2303 finish_spill_pointers ();
2306 /* Called after register allocation to add any instructions needed for the
2307 epilogue. Using an epilogue insn is favored compared to putting all of the
2308 instructions in output_function_prologue(), since it allows the scheduler
2309 to intermix instructions with the saves of the caller saved registers. In
2310 some cases, it might be necessary to emit a barrier instruction as the last
2311 insn to prevent such scheduling. */
2314 ia64_expand_epilogue (sibcall_p)
2317 rtx insn, reg, alt_reg, ar_unat_save_reg;
2318 int regno, alt_regno, cfa_off;
2320 ia64_compute_frame_size (get_frame_size ());
2322 /* If there is a frame pointer, then we use it instead of the stack
2323 pointer, so that the stack pointer does not need to be valid when
2324 the epilogue starts. See EXIT_IGNORE_STACK. */
2325 if (frame_pointer_needed)
2326 setup_spill_pointers (current_frame_info.n_spilled,
2327 hard_frame_pointer_rtx, 0);
2329 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2330 current_frame_info.total_size);
2332 if (current_frame_info.total_size != 0)
2334 /* ??? At this point we must generate a magic insn that appears to
2335 modify the spill iterators and the frame pointer. This would
2336 allow the most scheduling freedom. For now, just hard stop. */
2337 emit_insn (gen_blockage ());
2340 /* Locate the bottom of the register save area. */
2341 cfa_off = (current_frame_info.spill_cfa_off
2342 + current_frame_info.spill_size
2343 + current_frame_info.extra_spill_size);
2345 /* Restore the predicate registers. */
2346 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2348 if (current_frame_info.reg_save_pr != 0)
2349 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2352 alt_regno = next_scratch_gr_reg ();
2353 alt_reg = gen_rtx_REG (DImode, alt_regno);
2354 do_restore (gen_movdi_x, alt_reg, cfa_off);
2357 reg = gen_rtx_REG (DImode, PR_REG (0));
2358 emit_move_insn (reg, alt_reg);
2361 /* Restore the application registers. */
2363 /* Load the saved unat from the stack, but do not restore it until
2364 after the GRs have been restored. */
2365 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2367 if (current_frame_info.reg_save_ar_unat != 0)
2369 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2372 alt_regno = next_scratch_gr_reg ();
2373 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2374 current_frame_info.gr_used_mask |= 1 << alt_regno;
2375 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2380 ar_unat_save_reg = NULL_RTX;
2382 if (current_frame_info.reg_save_ar_pfs != 0)
2384 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2385 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2386 emit_move_insn (reg, alt_reg);
2388 else if (! current_function_is_leaf)
2390 alt_regno = next_scratch_gr_reg ();
2391 alt_reg = gen_rtx_REG (DImode, alt_regno);
2392 do_restore (gen_movdi_x, alt_reg, cfa_off);
2394 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2395 emit_move_insn (reg, alt_reg);
2398 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2400 if (current_frame_info.reg_save_ar_lc != 0)
2401 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2404 alt_regno = next_scratch_gr_reg ();
2405 alt_reg = gen_rtx_REG (DImode, alt_regno);
2406 do_restore (gen_movdi_x, alt_reg, cfa_off);
2409 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2410 emit_move_insn (reg, alt_reg);
2413 /* We should now be at the base of the gr/br/fr spill area. */
2414 if (cfa_off != (current_frame_info.spill_cfa_off
2415 + current_frame_info.spill_size))
2418 /* Restore all general registers. */
2419 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2420 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2422 reg = gen_rtx_REG (DImode, regno);
2423 do_restore (gen_gr_restore, reg, cfa_off);
2427 /* Restore the branch registers. Handle B0 specially, as it may
2428 have gotten stored in some GR register. */
2429 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2431 if (current_frame_info.reg_save_b0 != 0)
2432 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2435 alt_regno = next_scratch_gr_reg ();
2436 alt_reg = gen_rtx_REG (DImode, alt_regno);
2437 do_restore (gen_movdi_x, alt_reg, cfa_off);
2440 reg = gen_rtx_REG (DImode, BR_REG (0));
2441 emit_move_insn (reg, alt_reg);
2444 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2445 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2447 alt_regno = next_scratch_gr_reg ();
2448 alt_reg = gen_rtx_REG (DImode, alt_regno);
2449 do_restore (gen_movdi_x, alt_reg, cfa_off);
2451 reg = gen_rtx_REG (DImode, regno);
2452 emit_move_insn (reg, alt_reg);
2455 /* Restore floating point registers. */
2456 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2457 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2461 reg = gen_rtx_REG (TFmode, regno);
2462 do_restore (gen_fr_restore_x, reg, cfa_off);
2466 /* Restore ar.unat for real. */
2467 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2469 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2470 emit_move_insn (reg, ar_unat_save_reg);
2473 if (cfa_off != current_frame_info.spill_cfa_off)
2476 finish_spill_pointers ();
2478 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2480 /* ??? At this point we must generate a magic insn that appears to
2481 modify the spill iterators, the stack pointer, and the frame
2482 pointer. This would allow the most scheduling freedom. For now,
2484 emit_insn (gen_blockage ());
2487 if (cfun->machine->ia64_eh_epilogue_sp)
2488 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2489 else if (frame_pointer_needed)
2491 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2492 RTX_FRAME_RELATED_P (insn) = 1;
2494 else if (current_frame_info.total_size)
2496 rtx offset, frame_size_rtx;
2498 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2499 if (CONST_OK_FOR_I (current_frame_info.total_size))
2500 offset = frame_size_rtx;
2503 regno = next_scratch_gr_reg ();
2504 offset = gen_rtx_REG (DImode, regno);
2505 emit_move_insn (offset, frame_size_rtx);
2508 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2511 RTX_FRAME_RELATED_P (insn) = 1;
2512 if (GET_CODE (offset) != CONST_INT)
2515 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2516 gen_rtx_SET (VOIDmode,
2518 gen_rtx_PLUS (DImode,
2525 if (cfun->machine->ia64_eh_epilogue_bsp)
2526 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2529 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2532 int fp = GR_REG (2);
2533 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2534 first available call clobbered register. If there was a frame_pointer
2535 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2536 so we have to make sure we're using the string "r2" when emitting
2537 the register name for the assmbler. */
2538 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2539 fp = HARD_FRAME_POINTER_REGNUM;
2541 /* We must emit an alloc to force the input registers to become output
2542 registers. Otherwise, if the callee tries to pass its parameters
2543 through to another call without an intervening alloc, then these
2545 /* ??? We don't need to preserve all input registers. We only need to
2546 preserve those input registers used as arguments to the sibling call.
2547 It is unclear how to compute that number here. */
2548 if (current_frame_info.n_input_regs != 0)
2549 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2550 GEN_INT (0), GEN_INT (0),
2551 GEN_INT (current_frame_info.n_input_regs),
2556 /* Return 1 if br.ret can do all the work required to return from a
2560 ia64_direct_return ()
2562 if (reload_completed && ! frame_pointer_needed)
2564 ia64_compute_frame_size (get_frame_size ());
2566 return (current_frame_info.total_size == 0
2567 && current_frame_info.n_spilled == 0
2568 && current_frame_info.reg_save_b0 == 0
2569 && current_frame_info.reg_save_pr == 0
2570 && current_frame_info.reg_save_ar_pfs == 0
2571 && current_frame_info.reg_save_ar_unat == 0
2572 && current_frame_info.reg_save_ar_lc == 0);
2578 ia64_hard_regno_rename_ok (from, to)
2582 /* Don't clobber any of the registers we reserved for the prologue. */
2583 if (to == current_frame_info.reg_fp
2584 || to == current_frame_info.reg_save_b0
2585 || to == current_frame_info.reg_save_pr
2586 || to == current_frame_info.reg_save_ar_pfs
2587 || to == current_frame_info.reg_save_ar_unat
2588 || to == current_frame_info.reg_save_ar_lc)
2591 if (from == current_frame_info.reg_fp
2592 || from == current_frame_info.reg_save_b0
2593 || from == current_frame_info.reg_save_pr
2594 || from == current_frame_info.reg_save_ar_pfs
2595 || from == current_frame_info.reg_save_ar_unat
2596 || from == current_frame_info.reg_save_ar_lc)
2599 /* Don't use output registers outside the register frame. */
2600 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2603 /* Retain even/oddness on predicate register pairs. */
2604 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2605 return (from & 1) == (to & 1);
2607 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2608 if (from == GR_REG (4) && current_function_calls_setjmp)
2614 /* Target hook for assembling integer objects. Handle word-sized
2615 aligned objects and detect the cases when @fptr is needed. */
2618 ia64_assemble_integer (x, size, aligned_p)
2623 if (size == UNITS_PER_WORD && aligned_p
2624 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2625 && GET_CODE (x) == SYMBOL_REF
2626 && SYMBOL_REF_FLAG (x))
2628 fputs ("\tdata8\t@fptr(", asm_out_file);
2629 output_addr_const (asm_out_file, x);
2630 fputs (")\n", asm_out_file);
2633 return default_assemble_integer (x, size, aligned_p);
2636 /* Emit the function prologue. */
2639 ia64_output_function_prologue (file, size)
2641 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2643 int mask, grsave, grsave_prev;
2645 if (current_frame_info.need_regstk)
2646 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2647 current_frame_info.n_input_regs,
2648 current_frame_info.n_local_regs,
2649 current_frame_info.n_output_regs,
2650 current_frame_info.n_rotate_regs);
2652 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2655 /* Emit the .prologue directive. */
2658 grsave = grsave_prev = 0;
2659 if (current_frame_info.reg_save_b0 != 0)
2662 grsave = grsave_prev = current_frame_info.reg_save_b0;
2664 if (current_frame_info.reg_save_ar_pfs != 0
2665 && (grsave_prev == 0
2666 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2669 if (grsave_prev == 0)
2670 grsave = current_frame_info.reg_save_ar_pfs;
2671 grsave_prev = current_frame_info.reg_save_ar_pfs;
2673 if (current_frame_info.reg_fp != 0
2674 && (grsave_prev == 0
2675 || current_frame_info.reg_fp == grsave_prev + 1))
2678 if (grsave_prev == 0)
2679 grsave = HARD_FRAME_POINTER_REGNUM;
2680 grsave_prev = current_frame_info.reg_fp;
2682 if (current_frame_info.reg_save_pr != 0
2683 && (grsave_prev == 0
2684 || current_frame_info.reg_save_pr == grsave_prev + 1))
2687 if (grsave_prev == 0)
2688 grsave = current_frame_info.reg_save_pr;
2692 fprintf (file, "\t.prologue %d, %d\n", mask,
2693 ia64_dbx_register_number (grsave));
2695 fputs ("\t.prologue\n", file);
2697 /* Emit a .spill directive, if necessary, to relocate the base of
2698 the register spill area. */
2699 if (current_frame_info.spill_cfa_off != -16)
2700 fprintf (file, "\t.spill %ld\n",
2701 (long) (current_frame_info.spill_cfa_off
2702 + current_frame_info.spill_size));
2705 /* Emit the .body directive at the scheduled end of the prologue. */
2708 ia64_output_function_end_prologue (file)
2711 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2714 fputs ("\t.body\n", file);
2717 /* Emit the function epilogue. */
2720 ia64_output_function_epilogue (file, size)
2721 FILE *file ATTRIBUTE_UNUSED;
2722 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2726 /* Reset from the function's potential modifications. */
2727 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2729 if (current_frame_info.reg_fp)
2731 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2732 reg_names[HARD_FRAME_POINTER_REGNUM]
2733 = reg_names[current_frame_info.reg_fp];
2734 reg_names[current_frame_info.reg_fp] = tmp;
2736 if (! TARGET_REG_NAMES)
2738 for (i = 0; i < current_frame_info.n_input_regs; i++)
2739 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2740 for (i = 0; i < current_frame_info.n_local_regs; i++)
2741 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2742 for (i = 0; i < current_frame_info.n_output_regs; i++)
2743 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2746 current_frame_info.initialized = 0;
2750 ia64_dbx_register_number (regno)
2753 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2754 from its home at loc79 to something inside the register frame. We
2755 must perform the same renumbering here for the debug info. */
2756 if (current_frame_info.reg_fp)
2758 if (regno == HARD_FRAME_POINTER_REGNUM)
2759 regno = current_frame_info.reg_fp;
2760 else if (regno == current_frame_info.reg_fp)
2761 regno = HARD_FRAME_POINTER_REGNUM;
2764 if (IN_REGNO_P (regno))
2765 return 32 + regno - IN_REG (0);
2766 else if (LOC_REGNO_P (regno))
2767 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2768 else if (OUT_REGNO_P (regno))
2769 return (32 + current_frame_info.n_input_regs
2770 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2776 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2777 rtx addr, fnaddr, static_chain;
2779 rtx addr_reg, eight = GEN_INT (8);
2781 /* Load up our iterator. */
2782 addr_reg = gen_reg_rtx (Pmode);
2783 emit_move_insn (addr_reg, addr);
2785 /* The first two words are the fake descriptor:
2786 __ia64_trampoline, ADDR+16. */
2787 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2788 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2789 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2791 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2792 copy_to_reg (plus_constant (addr, 16)));
2793 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2795 /* The third word is the target descriptor. */
2796 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2797 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2799 /* The fourth word is the static chain. */
2800 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2803 /* Do any needed setup for a variadic function. CUM has not been updated
2804 for the last named argument which has type TYPE and mode MODE.
2806 We generate the actual spill instructions during prologue generation. */
2809 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2810 CUMULATIVE_ARGS cum;
2814 int second_time ATTRIBUTE_UNUSED;
2816 /* If this is a stdarg function, then skip the current argument. */
2817 if (! current_function_varargs)
2818 ia64_function_arg_advance (&cum, int_mode, type, 1);
2820 if (cum.words < MAX_ARGUMENT_SLOTS)
2822 int n = MAX_ARGUMENT_SLOTS - cum.words;
2823 *pretend_size = n * UNITS_PER_WORD;
2824 cfun->machine->n_varargs = n;
2828 /* Check whether TYPE is a homogeneous floating point aggregate. If
2829 it is, return the mode of the floating point type that appears
2830 in all leafs. If it is not, return VOIDmode.
2832 An aggregate is a homogeneous floating point aggregate is if all
2833 fields/elements in it have the same floating point type (e.g,
2834 SFmode). 128-bit quad-precision floats are excluded. */
2836 static enum machine_mode
2837 hfa_element_mode (type, nested)
2841 enum machine_mode element_mode = VOIDmode;
2842 enum machine_mode mode;
2843 enum tree_code code = TREE_CODE (type);
2844 int know_element_mode = 0;
2849 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2850 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2851 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2852 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2856 /* Fortran complex types are supposed to be HFAs, so we need to handle
2857 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2860 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2861 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2862 * BITS_PER_UNIT, MODE_FLOAT, 0);
2867 /* ??? Should exclude 128-bit long double here. */
2868 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2869 mode if this is contained within an aggregate. */
2871 return TYPE_MODE (type);
2876 return TYPE_MODE (TREE_TYPE (type));
2880 case QUAL_UNION_TYPE:
2881 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2883 if (TREE_CODE (t) != FIELD_DECL)
2886 mode = hfa_element_mode (TREE_TYPE (t), 1);
2887 if (know_element_mode)
2889 if (mode != element_mode)
2892 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2896 know_element_mode = 1;
2897 element_mode = mode;
2900 return element_mode;
2903 /* If we reach here, we probably have some front-end specific type
2904 that the backend doesn't know about. This can happen via the
2905 aggregate_value_p call in init_function_start. All we can do is
2906 ignore unknown tree types. */
2913 /* Return rtx for register where argument is passed, or zero if it is passed
2916 /* ??? 128-bit quad-precision floats are always passed in general
2920 ia64_function_arg (cum, mode, type, named, incoming)
2921 CUMULATIVE_ARGS *cum;
2922 enum machine_mode mode;
2927 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2928 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2929 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2932 enum machine_mode hfa_mode = VOIDmode;
2934 /* Integer and float arguments larger than 8 bytes start at the next even
2935 boundary. Aggregates larger than 8 bytes start at the next even boundary
2936 if the aggregate has 16 byte alignment. Net effect is that types with
2937 alignment greater than 8 start at the next even boundary. */
2938 /* ??? The ABI does not specify how to handle aggregates with alignment from
2939 9 to 15 bytes, or greater than 16. We handle them all as if they had
2940 16 byte alignment. Such aggregates can occur only if gcc extensions are
2942 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2944 && (cum->words & 1))
2947 /* If all argument slots are used, then it must go on the stack. */
2948 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2951 /* Check for and handle homogeneous FP aggregates. */
2953 hfa_mode = hfa_element_mode (type, 0);
2955 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2956 and unprototyped hfas are passed specially. */
2957 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2961 int fp_regs = cum->fp_regs;
2962 int int_regs = cum->words + offset;
2963 int hfa_size = GET_MODE_SIZE (hfa_mode);
2967 /* If prototyped, pass it in FR regs then GR regs.
2968 If not prototyped, pass it in both FR and GR regs.
2970 If this is an SFmode aggregate, then it is possible to run out of
2971 FR regs while GR regs are still left. In that case, we pass the
2972 remaining part in the GR regs. */
2974 /* Fill the FP regs. We do this always. We stop if we reach the end
2975 of the argument, the last FP register, or the last argument slot. */
2977 byte_size = ((mode == BLKmode)
2978 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2979 args_byte_size = int_regs * UNITS_PER_WORD;
2981 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2982 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2984 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2985 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2989 args_byte_size += hfa_size;
2993 /* If no prototype, then the whole thing must go in GR regs. */
2994 if (! cum->prototype)
2996 /* If this is an SFmode aggregate, then we might have some left over
2997 that needs to go in GR regs. */
2998 else if (byte_size != offset)
2999 int_regs += offset / UNITS_PER_WORD;
3001 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3003 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3005 enum machine_mode gr_mode = DImode;
3007 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3008 then this goes in a GR reg left adjusted/little endian, right
3009 adjusted/big endian. */
3010 /* ??? Currently this is handled wrong, because 4-byte hunks are
3011 always right adjusted/little endian. */
3014 /* If we have an even 4 byte hunk because the aggregate is a
3015 multiple of 4 bytes in size, then this goes in a GR reg right
3016 adjusted/little endian. */
3017 else if (byte_size - offset == 4)
3019 /* Complex floats need to have float mode. */
3020 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3023 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3024 gen_rtx_REG (gr_mode, (basereg
3027 offset += GET_MODE_SIZE (gr_mode);
3028 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3029 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3032 /* If we ended up using just one location, just return that one loc. */
3034 return XEXP (loc[0], 0);
3036 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3039 /* Integral and aggregates go in general registers. If we have run out of
3040 FR registers, then FP values must also go in general registers. This can
3041 happen when we have a SFmode HFA. */
3042 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3043 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3044 return gen_rtx_REG (mode, basereg + cum->words + offset);
3046 /* If there is a prototype, then FP values go in a FR register when
3047 named, and in a GR registeer when unnamed. */
3048 else if (cum->prototype)
3051 return gen_rtx_REG (mode, basereg + cum->words + offset);
3053 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3055 /* If there is no prototype, then FP values go in both FR and GR
3059 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3060 gen_rtx_REG (mode, (FR_ARG_FIRST
3063 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3065 (basereg + cum->words
3069 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3073 /* Return number of words, at the beginning of the argument, that must be
3074 put in registers. 0 is the argument is entirely in registers or entirely
3078 ia64_function_arg_partial_nregs (cum, mode, type, named)
3079 CUMULATIVE_ARGS *cum;
3080 enum machine_mode mode;
3082 int named ATTRIBUTE_UNUSED;
3084 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3085 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3089 /* Arguments with alignment larger than 8 bytes start at the next even
3091 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3093 && (cum->words & 1))
3096 /* If all argument slots are used, then it must go on the stack. */
3097 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3100 /* It doesn't matter whether the argument goes in FR or GR regs. If
3101 it fits within the 8 argument slots, then it goes entirely in
3102 registers. If it extends past the last argument slot, then the rest
3103 goes on the stack. */
3105 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3108 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3111 /* Update CUM to point after this argument. This is patterned after
3112 ia64_function_arg. */
3115 ia64_function_arg_advance (cum, mode, type, named)
3116 CUMULATIVE_ARGS *cum;
3117 enum machine_mode mode;
3121 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3122 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3125 enum machine_mode hfa_mode = VOIDmode;
3127 /* If all arg slots are already full, then there is nothing to do. */
3128 if (cum->words >= MAX_ARGUMENT_SLOTS)
3131 /* Arguments with alignment larger than 8 bytes start at the next even
3133 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3135 && (cum->words & 1))
3138 cum->words += words + offset;
3140 /* Check for and handle homogeneous FP aggregates. */
3142 hfa_mode = hfa_element_mode (type, 0);
3144 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3145 and unprototyped hfas are passed specially. */
3146 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3148 int fp_regs = cum->fp_regs;
3149 /* This is the original value of cum->words + offset. */
3150 int int_regs = cum->words - words;
3151 int hfa_size = GET_MODE_SIZE (hfa_mode);
3155 /* If prototyped, pass it in FR regs then GR regs.
3156 If not prototyped, pass it in both FR and GR regs.
3158 If this is an SFmode aggregate, then it is possible to run out of
3159 FR regs while GR regs are still left. In that case, we pass the
3160 remaining part in the GR regs. */
3162 /* Fill the FP regs. We do this always. We stop if we reach the end
3163 of the argument, the last FP register, or the last argument slot. */
3165 byte_size = ((mode == BLKmode)
3166 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3167 args_byte_size = int_regs * UNITS_PER_WORD;
3169 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3170 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3173 args_byte_size += hfa_size;
3177 cum->fp_regs = fp_regs;
3180 /* Integral and aggregates go in general registers. If we have run out of
3181 FR registers, then FP values must also go in general registers. This can
3182 happen when we have a SFmode HFA. */
3183 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3186 /* If there is a prototype, then FP values go in a FR register when
3187 named, and in a GR registeer when unnamed. */
3188 else if (cum->prototype)
3193 /* ??? Complex types should not reach here. */
3194 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3196 /* If there is no prototype, then FP values go in both FR and GR
3199 /* ??? Complex types should not reach here. */
3200 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3205 /* Implement va_start. */
3208 ia64_va_start (stdarg_p, valist, nextarg)
3216 arg_words = current_function_args_info.words;
3221 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3223 nextarg = plus_constant (nextarg, ofs);
3224 std_expand_builtin_va_start (1, valist, nextarg);
3227 /* Implement va_arg. */
3230 ia64_va_arg (valist, type)
3235 /* Arguments with alignment larger than 8 bytes start at the next even
3237 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3239 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3240 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3241 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3242 build_int_2 (-2 * UNITS_PER_WORD, -1));
3243 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3244 TREE_SIDE_EFFECTS (t) = 1;
3245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3248 return std_expand_builtin_va_arg (valist, type);
3251 /* Return 1 if function return value returned in memory. Return 0 if it is
3255 ia64_return_in_memory (valtype)
3258 enum machine_mode mode;
3259 enum machine_mode hfa_mode;
3260 HOST_WIDE_INT byte_size;
3262 mode = TYPE_MODE (valtype);
3263 byte_size = GET_MODE_SIZE (mode);
3264 if (mode == BLKmode)
3266 byte_size = int_size_in_bytes (valtype);
3271 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3273 hfa_mode = hfa_element_mode (valtype, 0);
3274 if (hfa_mode != VOIDmode)
3276 int hfa_size = GET_MODE_SIZE (hfa_mode);
3278 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3283 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3289 /* Return rtx for register that holds the function return value. */
3292 ia64_function_value (valtype, func)
3294 tree func ATTRIBUTE_UNUSED;
3296 enum machine_mode mode;
3297 enum machine_mode hfa_mode;
3299 mode = TYPE_MODE (valtype);
3300 hfa_mode = hfa_element_mode (valtype, 0);
3302 if (hfa_mode != VOIDmode)
3310 hfa_size = GET_MODE_SIZE (hfa_mode);
3311 byte_size = ((mode == BLKmode)
3312 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3314 for (i = 0; offset < byte_size; i++)
3316 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3317 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3323 return XEXP (loc[0], 0);
3325 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3327 else if (FLOAT_TYPE_P (valtype) &&
3328 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3329 return gen_rtx_REG (mode, FR_ARG_FIRST);
3331 return gen_rtx_REG (mode, GR_RET_FIRST);
3334 /* Print a memory address as an operand to reference that memory location. */
3336 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3337 also call this from ia64_print_operand for memory addresses. */
3340 ia64_print_operand_address (stream, address)
3341 FILE * stream ATTRIBUTE_UNUSED;
3342 rtx address ATTRIBUTE_UNUSED;
3346 /* Print an operand to an assembler instruction.
3347 C Swap and print a comparison operator.
3348 D Print an FP comparison operator.
3349 E Print 32 - constant, for SImode shifts as extract.
3350 e Print 64 - constant, for DImode rotates.
3351 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3352 a floating point register emitted normally.
3353 I Invert a predicate register by adding 1.
3354 J Select the proper predicate register for a condition.
3355 j Select the inverse predicate register for a condition.
3356 O Append .acq for volatile load.
3357 P Postincrement of a MEM.
3358 Q Append .rel for volatile store.
3359 S Shift amount for shladd instruction.
3360 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3361 for Intel assembler.
3362 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3363 for Intel assembler.
3364 r Print register name, or constant 0 as r0. HP compatibility for
3367 ia64_print_operand (file, x, code)
3377 /* Handled below. */
3382 enum rtx_code c = swap_condition (GET_CODE (x));
3383 fputs (GET_RTX_NAME (c), file);
3388 switch (GET_CODE (x))
3400 str = GET_RTX_NAME (GET_CODE (x));
3407 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3411 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3415 if (x == CONST0_RTX (GET_MODE (x)))
3416 str = reg_names [FR_REG (0)];
3417 else if (x == CONST1_RTX (GET_MODE (x)))
3418 str = reg_names [FR_REG (1)];
3419 else if (GET_CODE (x) == REG)
3420 str = reg_names [REGNO (x)];
3427 fputs (reg_names [REGNO (x) + 1], file);
3433 unsigned int regno = REGNO (XEXP (x, 0));
3434 if (GET_CODE (x) == EQ)
3438 fputs (reg_names [regno], file);
3443 if (MEM_VOLATILE_P (x))
3444 fputs(".acq", file);
3449 HOST_WIDE_INT value;
3451 switch (GET_CODE (XEXP (x, 0)))
3457 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3458 if (GET_CODE (x) == CONST_INT)
3460 else if (GET_CODE (x) == REG)
3462 fprintf (file, ", %s", reg_names[REGNO (x)]);
3470 value = GET_MODE_SIZE (GET_MODE (x));
3474 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3480 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3485 if (MEM_VOLATILE_P (x))
3486 fputs(".rel", file);
3490 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3494 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3496 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3502 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3504 const char *prefix = "0x";
3505 if (INTVAL (x) & 0x80000000)
3507 fprintf (file, "0xffffffff");
3510 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3516 /* If this operand is the constant zero, write it as register zero.
3517 Any register, zero, or CONST_INT value is OK here. */
3518 if (GET_CODE (x) == REG)
3519 fputs (reg_names[REGNO (x)], file);
3520 else if (x == CONST0_RTX (GET_MODE (x)))
3522 else if (GET_CODE (x) == CONST_INT)
3523 output_addr_const (file, x);
3525 output_operand_lossage ("invalid %%r value");
3532 /* For conditional branches, returns or calls, substitute
3533 sptk, dptk, dpnt, or spnt for %s. */
3534 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3537 int pred_val = INTVAL (XEXP (x, 0));
3539 /* Guess top and bottom 10% statically predicted. */
3540 if (pred_val < REG_BR_PROB_BASE / 50)
3542 else if (pred_val < REG_BR_PROB_BASE / 2)
3544 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3549 else if (GET_CODE (current_output_insn) == CALL_INSN)
3554 fputs (which, file);
3559 x = current_insn_predicate;
3562 unsigned int regno = REGNO (XEXP (x, 0));
3563 if (GET_CODE (x) == EQ)
3565 fprintf (file, "(%s) ", reg_names [regno]);
3570 output_operand_lossage ("ia64_print_operand: unknown code");
3574 switch (GET_CODE (x))
3576 /* This happens for the spill/restore instructions. */
3581 /* ... fall through ... */
3584 fputs (reg_names [REGNO (x)], file);
3589 rtx addr = XEXP (x, 0);
3590 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3591 addr = XEXP (addr, 0);
3592 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3597 output_addr_const (file, x);
3604 /* Calulate the cost of moving data from a register in class FROM to
3605 one in class TO, using MODE. */
3608 ia64_register_move_cost (mode, from, to)
3609 enum machine_mode mode;
3610 enum reg_class from, to;
3612 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3613 if (to == ADDL_REGS)
3615 if (from == ADDL_REGS)
3618 /* All costs are symmetric, so reduce cases by putting the
3619 lower number class as the destination. */
3622 enum reg_class tmp = to;
3623 to = from, from = tmp;
3626 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3627 so that we get secondary memory reloads. Between FR_REGS,
3628 we have to make this at least as expensive as MEMORY_MOVE_COST
3629 to avoid spectacularly poor register class preferencing. */
3632 if (to != GR_REGS || from != GR_REGS)
3633 return MEMORY_MOVE_COST (mode, to, 0);
3641 /* Moving between PR registers takes two insns. */
3642 if (from == PR_REGS)
3644 /* Moving between PR and anything but GR is impossible. */
3645 if (from != GR_REGS)
3646 return MEMORY_MOVE_COST (mode, to, 0);
3650 /* Moving between BR and anything but GR is impossible. */
3651 if (from != GR_REGS && from != GR_AND_BR_REGS)
3652 return MEMORY_MOVE_COST (mode, to, 0);
3657 /* Moving between AR and anything but GR is impossible. */
3658 if (from != GR_REGS)
3659 return MEMORY_MOVE_COST (mode, to, 0);
3664 case GR_AND_FR_REGS:
3665 case GR_AND_BR_REGS:
3676 /* This function returns the register class required for a secondary
3677 register when copying between one of the registers in CLASS, and X,
3678 using MODE. A return value of NO_REGS means that no secondary register
3682 ia64_secondary_reload_class (class, mode, x)
3683 enum reg_class class;
3684 enum machine_mode mode ATTRIBUTE_UNUSED;
3689 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3690 regno = true_regnum (x);
3697 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3698 interaction. We end up with two pseudos with overlapping lifetimes
3699 both of which are equiv to the same constant, and both which need
3700 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3701 changes depending on the path length, which means the qty_first_reg
3702 check in make_regs_eqv can give different answers at different times.
3703 At some point I'll probably need a reload_indi pattern to handle
3706 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3707 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3708 non-general registers for good measure. */
3709 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3712 /* This is needed if a pseudo used as a call_operand gets spilled to a
3714 if (GET_CODE (x) == MEM)
3719 /* Need to go through general regsters to get to other class regs. */
3720 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3723 /* This can happen when a paradoxical subreg is an operand to the
3725 /* ??? This shouldn't be necessary after instruction scheduling is
3726 enabled, because paradoxical subregs are not accepted by
3727 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3728 stop the paradoxical subreg stupidity in the *_operand functions
3730 if (GET_CODE (x) == MEM
3731 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3732 || GET_MODE (x) == QImode))
3735 /* This can happen because of the ior/and/etc patterns that accept FP
3736 registers as operands. If the third operand is a constant, then it
3737 needs to be reloaded into a FP register. */
3738 if (GET_CODE (x) == CONST_INT)
3741 /* This can happen because of register elimination in a muldi3 insn.
3742 E.g. `26107 * (unsigned long)&u'. */
3743 if (GET_CODE (x) == PLUS)
3748 /* ??? This happens if we cse/gcse a BImode value across a call,
3749 and the function has a nonlocal goto. This is because global
3750 does not allocate call crossing pseudos to hard registers when
3751 current_function_has_nonlocal_goto is true. This is relatively
3752 common for C++ programs that use exceptions. To reproduce,
3753 return NO_REGS and compile libstdc++. */
3754 if (GET_CODE (x) == MEM)
3757 /* This can happen when we take a BImode subreg of a DImode value,
3758 and that DImode value winds up in some non-GR register. */
3759 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3764 /* Since we have no offsettable memory addresses, we need a temporary
3765 to hold the address of the second word. */
3778 /* Emit text to declare externally defined variables and functions, because
3779 the Intel assembler does not support undefined externals. */
3782 ia64_asm_output_external (file, decl, name)
3787 int save_referenced;
3789 /* GNU as does not need anything here. */
3793 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3794 the linker when we do this, so we need to be careful not to do this for
3795 builtin functions which have no library equivalent. Unfortunately, we
3796 can't tell here whether or not a function will actually be called by
3797 expand_expr, so we pull in library functions even if we may not need
3799 if (! strcmp (name, "__builtin_next_arg")
3800 || ! strcmp (name, "alloca")
3801 || ! strcmp (name, "__builtin_constant_p")
3802 || ! strcmp (name, "__builtin_args_info"))
3805 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3807 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3808 if (TREE_CODE (decl) == FUNCTION_DECL)
3810 fprintf (file, "%s", TYPE_ASM_OP);
3811 assemble_name (file, name);
3813 fprintf (file, TYPE_OPERAND_FMT, "function");
3816 ASM_GLOBALIZE_LABEL (file, name);
3817 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3820 /* Parse the -mfixed-range= option string. */
3823 fix_range (const_str)
3824 const char *const_str;
3827 char *str, *dash, *comma;
3829 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3830 REG2 are either register names or register numbers. The effect
3831 of this option is to mark the registers in the range from REG1 to
3832 REG2 as ``fixed'' so they won't be used by the compiler. This is
3833 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3835 i = strlen (const_str);
3836 str = (char *) alloca (i + 1);
3837 memcpy (str, const_str, i + 1);
3841 dash = strchr (str, '-');
3844 warning ("value of -mfixed-range must have form REG1-REG2");
3849 comma = strchr (dash + 1, ',');
3853 first = decode_reg_name (str);
3856 warning ("unknown register name: %s", str);
3860 last = decode_reg_name (dash + 1);
3863 warning ("unknown register name: %s", dash + 1);
3871 warning ("%s-%s is an empty range", str, dash + 1);
3875 for (i = first; i <= last; ++i)
3876 fixed_regs[i] = call_used_regs[i] = 1;
3886 /* Called to register all of our global variables with the garbage
3890 ia64_add_gc_roots ()
3892 ggc_add_rtx_root (&ia64_compare_op0, 1);
3893 ggc_add_rtx_root (&ia64_compare_op1, 1);
3897 ia64_init_machine_status (p)
3901 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3905 ia64_mark_machine_status (p)
3908 struct machine_function *machine = p->machine;
3912 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3913 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3914 ggc_mark_rtx (machine->ia64_gp_save);
3919 ia64_free_machine_status (p)
3926 /* Handle TARGET_OPTIONS switches. */
3929 ia64_override_options ()
3931 if (TARGET_AUTO_PIC)
3932 target_flags |= MASK_CONST_GP;
3934 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3936 warning ("cannot optimize division for both latency and throughput");
3937 target_flags &= ~MASK_INLINE_DIV_THR;
3940 if (ia64_fixed_range_string)
3941 fix_range (ia64_fixed_range_string);
3943 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3944 flag_schedule_insns_after_reload = 0;
3946 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3948 init_machine_status = ia64_init_machine_status;
3949 mark_machine_status = ia64_mark_machine_status;
3950 free_machine_status = ia64_free_machine_status;
3952 ia64_add_gc_roots ();
3955 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3956 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3957 static enum attr_type ia64_safe_type PARAMS((rtx));
3959 static enum attr_itanium_requires_unit0
3960 ia64_safe_itanium_requires_unit0 (insn)
3963 if (recog_memoized (insn) >= 0)
3964 return get_attr_itanium_requires_unit0 (insn);
3966 return ITANIUM_REQUIRES_UNIT0_NO;
3969 static enum attr_itanium_class
3970 ia64_safe_itanium_class (insn)
3973 if (recog_memoized (insn) >= 0)
3974 return get_attr_itanium_class (insn);
3976 return ITANIUM_CLASS_UNKNOWN;
3979 static enum attr_type
3980 ia64_safe_type (insn)
3983 if (recog_memoized (insn) >= 0)
3984 return get_attr_type (insn);
3986 return TYPE_UNKNOWN;
3989 /* The following collection of routines emit instruction group stop bits as
3990 necessary to avoid dependencies. */
3992 /* Need to track some additional registers as far as serialization is
3993 concerned so we can properly handle br.call and br.ret. We could
3994 make these registers visible to gcc, but since these registers are
3995 never explicitly used in gcc generated code, it seems wasteful to
3996 do so (plus it would make the call and return patterns needlessly
3998 #define REG_GP (GR_REG (1))
3999 #define REG_RP (BR_REG (0))
4000 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4001 /* This is used for volatile asms which may require a stop bit immediately
4002 before and after them. */
4003 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4004 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4005 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4007 /* For each register, we keep track of how it has been written in the
4008 current instruction group.
4010 If a register is written unconditionally (no qualifying predicate),
4011 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4013 If a register is written if its qualifying predicate P is true, we
4014 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4015 may be written again by the complement of P (P^1) and when this happens,
4016 WRITE_COUNT gets set to 2.
4018 The result of this is that whenever an insn attempts to write a register
4019 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4021 If a predicate register is written by a floating-point insn, we set
4022 WRITTEN_BY_FP to true.
4024 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4025 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4027 struct reg_write_state
4029 unsigned int write_count : 2;
4030 unsigned int first_pred : 16;
4031 unsigned int written_by_fp : 1;
4032 unsigned int written_by_and : 1;
4033 unsigned int written_by_or : 1;
4036 /* Cumulative info for the current instruction group. */
4037 struct reg_write_state rws_sum[NUM_REGS];
4038 /* Info for the current instruction. This gets copied to rws_sum after a
4039 stop bit is emitted. */
4040 struct reg_write_state rws_insn[NUM_REGS];
4042 /* Indicates whether this is the first instruction after a stop bit,
4043 in which case we don't need another stop bit. Without this, we hit
4044 the abort in ia64_variable_issue when scheduling an alloc. */
4045 static int first_instruction;
4047 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4048 RTL for one instruction. */
4051 unsigned int is_write : 1; /* Is register being written? */
4052 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4053 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4054 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4055 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4056 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4059 static void rws_update PARAMS ((struct reg_write_state *, int,
4060 struct reg_flags, int));
4061 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4062 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4063 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4064 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4065 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4066 static void init_insn_group_barriers PARAMS ((void));
4067 static int group_barrier_needed_p PARAMS ((rtx));
4068 static int safe_group_barrier_needed_p PARAMS ((rtx));
4070 /* Update *RWS for REGNO, which is being written by the current instruction,
4071 with predicate PRED, and associated register flags in FLAGS. */
4074 rws_update (rws, regno, flags, pred)
4075 struct reg_write_state *rws;
4077 struct reg_flags flags;
4081 rws[regno].write_count++;
4083 rws[regno].write_count = 2;
4084 rws[regno].written_by_fp |= flags.is_fp;
4085 /* ??? Not tracking and/or across differing predicates. */
4086 rws[regno].written_by_and = flags.is_and;
4087 rws[regno].written_by_or = flags.is_or;
4088 rws[regno].first_pred = pred;
4091 /* Handle an access to register REGNO of type FLAGS using predicate register
4092 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4093 a dependency with an earlier instruction in the same group. */
4096 rws_access_regno (regno, flags, pred)
4098 struct reg_flags flags;
4101 int need_barrier = 0;
4103 if (regno >= NUM_REGS)
4106 if (! PR_REGNO_P (regno))
4107 flags.is_and = flags.is_or = 0;
4113 /* One insn writes same reg multiple times? */
4114 if (rws_insn[regno].write_count > 0)
4117 /* Update info for current instruction. */
4118 rws_update (rws_insn, regno, flags, pred);
4119 write_count = rws_sum[regno].write_count;
4121 switch (write_count)
4124 /* The register has not been written yet. */
4125 rws_update (rws_sum, regno, flags, pred);
4129 /* The register has been written via a predicate. If this is
4130 not a complementary predicate, then we need a barrier. */
4131 /* ??? This assumes that P and P+1 are always complementary
4132 predicates for P even. */
4133 if (flags.is_and && rws_sum[regno].written_by_and)
4135 else if (flags.is_or && rws_sum[regno].written_by_or)
4137 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4139 rws_update (rws_sum, regno, flags, pred);
4143 /* The register has been unconditionally written already. We
4145 if (flags.is_and && rws_sum[regno].written_by_and)
4147 else if (flags.is_or && rws_sum[regno].written_by_or)
4151 rws_sum[regno].written_by_and = flags.is_and;
4152 rws_sum[regno].written_by_or = flags.is_or;
4161 if (flags.is_branch)
4163 /* Branches have several RAW exceptions that allow to avoid
4166 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4167 /* RAW dependencies on branch regs are permissible as long
4168 as the writer is a non-branch instruction. Since we
4169 never generate code that uses a branch register written
4170 by a branch instruction, handling this case is
4174 if (REGNO_REG_CLASS (regno) == PR_REGS
4175 && ! rws_sum[regno].written_by_fp)
4176 /* The predicates of a branch are available within the
4177 same insn group as long as the predicate was written by
4178 something other than a floating-point instruction. */
4182 if (flags.is_and && rws_sum[regno].written_by_and)
4184 if (flags.is_or && rws_sum[regno].written_by_or)
4187 switch (rws_sum[regno].write_count)
4190 /* The register has not been written yet. */
4194 /* The register has been written via a predicate. If this is
4195 not a complementary predicate, then we need a barrier. */
4196 /* ??? This assumes that P and P+1 are always complementary
4197 predicates for P even. */
4198 if ((rws_sum[regno].first_pred ^ 1) != pred)
4203 /* The register has been unconditionally written already. We
4213 return need_barrier;
4217 rws_access_reg (reg, flags, pred)
4219 struct reg_flags flags;
4222 int regno = REGNO (reg);
4223 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4226 return rws_access_regno (regno, flags, pred);
4229 int need_barrier = 0;
4231 need_barrier |= rws_access_regno (regno + n, flags, pred);
4232 return need_barrier;
4236 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4237 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4240 update_set_flags (x, pflags, ppred, pcond)
4242 struct reg_flags *pflags;
4246 rtx src = SET_SRC (x);
4250 switch (GET_CODE (src))
4256 if (SET_DEST (x) == pc_rtx)
4257 /* X is a conditional branch. */
4261 int is_complemented = 0;
4263 /* X is a conditional move. */
4264 rtx cond = XEXP (src, 0);
4265 if (GET_CODE (cond) == EQ)
4266 is_complemented = 1;
4267 cond = XEXP (cond, 0);
4268 if (GET_CODE (cond) != REG
4269 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4272 if (XEXP (src, 1) == SET_DEST (x)
4273 || XEXP (src, 2) == SET_DEST (x))
4275 /* X is a conditional move that conditionally writes the
4278 /* We need another complement in this case. */
4279 if (XEXP (src, 1) == SET_DEST (x))
4280 is_complemented = ! is_complemented;
4282 *ppred = REGNO (cond);
4283 if (is_complemented)
4287 /* ??? If this is a conditional write to the dest, then this
4288 instruction does not actually read one source. This probably
4289 doesn't matter, because that source is also the dest. */
4290 /* ??? Multiple writes to predicate registers are allowed
4291 if they are all AND type compares, or if they are all OR
4292 type compares. We do not generate such instructions
4295 /* ... fall through ... */
4298 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4299 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4300 /* Set pflags->is_fp to 1 so that we know we're dealing
4301 with a floating point comparison when processing the
4302 destination of the SET. */
4305 /* Discover if this is a parallel comparison. We only handle
4306 and.orcm and or.andcm at present, since we must retain a
4307 strict inverse on the predicate pair. */
4308 else if (GET_CODE (src) == AND)
4310 else if (GET_CODE (src) == IOR)
4317 /* Subroutine of rtx_needs_barrier; this function determines whether the
4318 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4319 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4323 set_src_needs_barrier (x, flags, pred, cond)
4325 struct reg_flags flags;
4329 int need_barrier = 0;
4331 rtx src = SET_SRC (x);
4333 if (GET_CODE (src) == CALL)
4334 /* We don't need to worry about the result registers that
4335 get written by subroutine call. */
4336 return rtx_needs_barrier (src, flags, pred);
4337 else if (SET_DEST (x) == pc_rtx)
4339 /* X is a conditional branch. */
4340 /* ??? This seems redundant, as the caller sets this bit for
4342 flags.is_branch = 1;
4343 return rtx_needs_barrier (src, flags, pred);
4346 need_barrier = rtx_needs_barrier (src, flags, pred);
4348 /* This instruction unconditionally uses a predicate register. */
4350 need_barrier |= rws_access_reg (cond, flags, 0);
4353 if (GET_CODE (dst) == ZERO_EXTRACT)
4355 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4356 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4357 dst = XEXP (dst, 0);
4359 return need_barrier;
4362 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4363 Return 1 is this access creates a dependency with an earlier instruction
4364 in the same group. */
4367 rtx_needs_barrier (x, flags, pred)
4369 struct reg_flags flags;
4373 int is_complemented = 0;
4374 int need_barrier = 0;
4375 const char *format_ptr;
4376 struct reg_flags new_flags;
4384 switch (GET_CODE (x))
4387 update_set_flags (x, &new_flags, &pred, &cond);
4388 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4389 if (GET_CODE (SET_SRC (x)) != CALL)
4391 new_flags.is_write = 1;
4392 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4397 new_flags.is_write = 0;
4398 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4400 /* Avoid multiple register writes, in case this is a pattern with
4401 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4402 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4404 new_flags.is_write = 1;
4405 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4406 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4407 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4412 /* X is a predicated instruction. */
4414 cond = COND_EXEC_TEST (x);
4417 need_barrier = rtx_needs_barrier (cond, flags, 0);
4419 if (GET_CODE (cond) == EQ)
4420 is_complemented = 1;
4421 cond = XEXP (cond, 0);
4422 if (GET_CODE (cond) != REG
4423 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4425 pred = REGNO (cond);
4426 if (is_complemented)
4429 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4430 return need_barrier;
4434 /* Clobber & use are for earlier compiler-phases only. */
4439 /* We always emit stop bits for traditional asms. We emit stop bits
4440 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4441 if (GET_CODE (x) != ASM_OPERANDS
4442 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4444 /* Avoid writing the register multiple times if we have multiple
4445 asm outputs. This avoids an abort in rws_access_reg. */
4446 if (! rws_insn[REG_VOLATILE].write_count)
4448 new_flags.is_write = 1;
4449 rws_access_regno (REG_VOLATILE, new_flags, pred);
4454 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4455 We can not just fall through here since then we would be confused
4456 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4457 traditional asms unlike their normal usage. */
4459 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4460 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4465 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4467 rtx pat = XVECEXP (x, 0, i);
4468 if (GET_CODE (pat) == SET)
4470 update_set_flags (pat, &new_flags, &pred, &cond);
4471 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4473 else if (GET_CODE (pat) == USE
4474 || GET_CODE (pat) == CALL
4475 || GET_CODE (pat) == ASM_OPERANDS)
4476 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4477 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4480 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4482 rtx pat = XVECEXP (x, 0, i);
4483 if (GET_CODE (pat) == SET)
4485 if (GET_CODE (SET_SRC (pat)) != CALL)
4487 new_flags.is_write = 1;
4488 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4492 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4493 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4501 if (REGNO (x) == AR_UNAT_REGNUM)
4503 for (i = 0; i < 64; ++i)
4504 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4507 need_barrier = rws_access_reg (x, flags, pred);
4511 /* Find the regs used in memory address computation. */
4512 new_flags.is_write = 0;
4513 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4516 case CONST_INT: case CONST_DOUBLE:
4517 case SYMBOL_REF: case LABEL_REF: case CONST:
4520 /* Operators with side-effects. */
4521 case POST_INC: case POST_DEC:
4522 if (GET_CODE (XEXP (x, 0)) != REG)
4525 new_flags.is_write = 0;
4526 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4527 new_flags.is_write = 1;
4528 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4532 if (GET_CODE (XEXP (x, 0)) != REG)
4535 new_flags.is_write = 0;
4536 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4537 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4538 new_flags.is_write = 1;
4539 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4542 /* Handle common unary and binary ops for efficiency. */
4543 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4544 case MOD: case UDIV: case UMOD: case AND: case IOR:
4545 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4546 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4547 case NE: case EQ: case GE: case GT: case LE:
4548 case LT: case GEU: case GTU: case LEU: case LTU:
4549 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4550 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4553 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4554 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4555 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4556 case SQRT: case FFS:
4557 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4561 switch (XINT (x, 1))
4563 case 1: /* st8.spill */
4564 case 2: /* ld8.fill */
4566 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4567 HOST_WIDE_INT bit = (offset >> 3) & 63;
4569 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4570 new_flags.is_write = (XINT (x, 1) == 1);
4571 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4576 case 3: /* stf.spill */
4577 case 4: /* ldf.spill */
4578 case 8: /* popcnt */
4579 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4582 case 7: /* pred_rel_mutex */
4583 case 9: /* pic call */
4585 case 19: /* fetchadd_acq */
4586 case 20: /* mov = ar.bsp */
4587 case 21: /* flushrs */
4588 case 22: /* bundle selector */
4589 case 23: /* cycle display */
4592 case 24: /* addp4 */
4593 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4596 case 5: /* recip_approx */
4597 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4598 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4601 case 13: /* cmpxchg_acq */
4602 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4603 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4611 case UNSPEC_VOLATILE:
4612 switch (XINT (x, 1))
4615 /* Alloc must always be the first instruction of a group.
4616 We force this by always returning true. */
4617 /* ??? We might get better scheduling if we explicitly check for
4618 input/local/output register dependencies, and modify the
4619 scheduler so that alloc is always reordered to the start of
4620 the current group. We could then eliminate all of the
4621 first_instruction code. */
4622 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4624 new_flags.is_write = 1;
4625 rws_access_regno (REG_AR_CFM, new_flags, pred);
4628 case 1: /* blockage */
4629 case 2: /* insn group barrier */
4632 case 5: /* set_bsp */
4636 case 7: /* pred.rel.mutex */
4637 case 8: /* safe_across_calls all */
4638 case 9: /* safe_across_calls normal */
4647 new_flags.is_write = 0;
4648 need_barrier = rws_access_regno (REG_RP, flags, pred);
4649 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4651 new_flags.is_write = 1;
4652 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4653 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4657 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4658 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4659 switch (format_ptr[i])
4661 case '0': /* unused field */
4662 case 'i': /* integer */
4663 case 'n': /* note */
4664 case 'w': /* wide integer */
4665 case 's': /* pointer to string */
4666 case 'S': /* optional pointer to string */
4670 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4675 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4676 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4685 return need_barrier;
4688 /* Clear out the state for group_barrier_needed_p at the start of a
4689 sequence of insns. */
4692 init_insn_group_barriers ()
4694 memset (rws_sum, 0, sizeof (rws_sum));
4695 first_instruction = 1;
4698 /* Given the current state, recorded by previous calls to this function,
4699 determine whether a group barrier (a stop bit) is necessary before INSN.
4700 Return nonzero if so. */
4703 group_barrier_needed_p (insn)
4707 int need_barrier = 0;
4708 struct reg_flags flags;
4710 memset (&flags, 0, sizeof (flags));
4711 switch (GET_CODE (insn))
4717 /* A barrier doesn't imply an instruction group boundary. */
4721 memset (rws_insn, 0, sizeof (rws_insn));
4725 flags.is_branch = 1;
4726 flags.is_sibcall = SIBLING_CALL_P (insn);
4727 memset (rws_insn, 0, sizeof (rws_insn));
4729 /* Don't bundle a call following another call. */
4730 if ((pat = prev_active_insn (insn))
4731 && GET_CODE (pat) == CALL_INSN)
4737 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4741 flags.is_branch = 1;
4743 /* Don't bundle a jump following a call. */
4744 if ((pat = prev_active_insn (insn))
4745 && GET_CODE (pat) == CALL_INSN)
4753 if (GET_CODE (PATTERN (insn)) == USE
4754 || GET_CODE (PATTERN (insn)) == CLOBBER)
4755 /* Don't care about USE and CLOBBER "insns"---those are used to
4756 indicate to the optimizer that it shouldn't get rid of
4757 certain operations. */
4760 pat = PATTERN (insn);
4762 /* Ug. Hack hacks hacked elsewhere. */
4763 switch (recog_memoized (insn))
4765 /* We play dependency tricks with the epilogue in order
4766 to get proper schedules. Undo this for dv analysis. */
4767 case CODE_FOR_epilogue_deallocate_stack:
4768 pat = XVECEXP (pat, 0, 0);
4771 /* The pattern we use for br.cloop confuses the code above.
4772 The second element of the vector is representative. */
4773 case CODE_FOR_doloop_end_internal:
4774 pat = XVECEXP (pat, 0, 1);
4777 /* Doesn't generate code. */
4778 case CODE_FOR_pred_rel_mutex:
4779 case CODE_FOR_prologue_use:
4786 memset (rws_insn, 0, sizeof (rws_insn));
4787 need_barrier = rtx_needs_barrier (pat, flags, 0);
4789 /* Check to see if the previous instruction was a volatile
4792 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4799 if (first_instruction)
4802 first_instruction = 0;
4805 return need_barrier;
4808 /* Like group_barrier_needed_p, but do not clobber the current state. */
4811 safe_group_barrier_needed_p (insn)
4814 struct reg_write_state rws_saved[NUM_REGS];
4815 int saved_first_instruction;
4818 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4819 saved_first_instruction = first_instruction;
4821 t = group_barrier_needed_p (insn);
4823 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4824 first_instruction = saved_first_instruction;
4829 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4830 as necessary to eliminate dependendencies. This function assumes that
4831 a final instruction scheduling pass has been run which has already
4832 inserted most of the necessary stop bits. This function only inserts
4833 new ones at basic block boundaries, since these are invisible to the
4837 emit_insn_group_barriers (dump, insns)
4843 int insns_since_last_label = 0;
4845 init_insn_group_barriers ();
4847 for (insn = insns; insn; insn = NEXT_INSN (insn))
4849 if (GET_CODE (insn) == CODE_LABEL)
4851 if (insns_since_last_label)
4853 insns_since_last_label = 0;
4855 else if (GET_CODE (insn) == NOTE
4856 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4858 if (insns_since_last_label)
4860 insns_since_last_label = 0;
4862 else if (GET_CODE (insn) == INSN
4863 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4864 && XINT (PATTERN (insn), 1) == 2)
4866 init_insn_group_barriers ();
4869 else if (INSN_P (insn))
4871 insns_since_last_label = 1;
4873 if (group_barrier_needed_p (insn))
4878 fprintf (dump, "Emitting stop before label %d\n",
4879 INSN_UID (last_label));
4880 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4883 init_insn_group_barriers ();
4891 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4892 This function has to emit all necessary group barriers. */
4895 emit_all_insn_group_barriers (dump, insns)
4896 FILE *dump ATTRIBUTE_UNUSED;
4901 init_insn_group_barriers ();
4903 for (insn = insns; insn; insn = NEXT_INSN (insn))
4905 if (GET_CODE (insn) == BARRIER)
4907 rtx last = prev_active_insn (insn);
4911 if (GET_CODE (last) == JUMP_INSN
4912 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4913 last = prev_active_insn (last);
4914 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4915 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4917 init_insn_group_barriers ();
4919 else if (INSN_P (insn))
4921 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4922 init_insn_group_barriers ();
4923 else if (group_barrier_needed_p (insn))
4925 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4926 init_insn_group_barriers ();
4927 group_barrier_needed_p (insn);
4933 static int errata_find_address_regs PARAMS ((rtx *, void *));
4934 static void errata_emit_nops PARAMS ((rtx));
4935 static void fixup_errata PARAMS ((void));
4937 /* This structure is used to track some details about the previous insns
4938 groups so we can determine if it may be necessary to insert NOPs to
4939 workaround hardware errata. */
4942 HARD_REG_SET p_reg_set;
4943 HARD_REG_SET gr_reg_conditionally_set;
4946 /* Index into the last_group array. */
4947 static int group_idx;
4949 /* Called through for_each_rtx; determines if a hard register that was
4950 conditionally set in the previous group is used as an address register.
4951 It ensures that for_each_rtx returns 1 in that case. */
4953 errata_find_address_regs (xp, data)
4955 void *data ATTRIBUTE_UNUSED;
4958 if (GET_CODE (x) != MEM)
4961 if (GET_CODE (x) == POST_MODIFY)
4963 if (GET_CODE (x) == REG)
4965 struct group *prev_group = last_group + (group_idx ^ 1);
4966 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4974 /* Called for each insn; this function keeps track of the state in
4975 last_group and emits additional NOPs if necessary to work around
4976 an Itanium A/B step erratum. */
4978 errata_emit_nops (insn)
4981 struct group *this_group = last_group + group_idx;
4982 struct group *prev_group = last_group + (group_idx ^ 1);
4983 rtx pat = PATTERN (insn);
4984 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4985 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4986 enum attr_type type;
4989 if (GET_CODE (real_pat) == USE
4990 || GET_CODE (real_pat) == CLOBBER
4991 || GET_CODE (real_pat) == ASM_INPUT
4992 || GET_CODE (real_pat) == ADDR_VEC
4993 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4994 || asm_noperands (PATTERN (insn)) >= 0)
4997 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5000 if (GET_CODE (set) == PARALLEL)
5003 set = XVECEXP (real_pat, 0, 0);
5004 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5005 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5006 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5013 if (set && GET_CODE (set) != SET)
5016 type = get_attr_type (insn);
5019 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5020 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5022 if ((type == TYPE_M || type == TYPE_A) && cond && set
5023 && REG_P (SET_DEST (set))
5024 && GET_CODE (SET_SRC (set)) != PLUS
5025 && GET_CODE (SET_SRC (set)) != MINUS
5026 && (GET_CODE (SET_SRC (set)) != ASHIFT
5027 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5028 && (GET_CODE (SET_SRC (set)) != MEM
5029 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5030 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5032 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5033 || ! REG_P (XEXP (cond, 0)))
5036 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5037 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5039 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5041 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5042 emit_insn_before (gen_nop (), insn);
5043 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5045 memset (last_group, 0, sizeof last_group);
5049 /* Emit extra nops if they are required to work around hardware errata. */
5056 if (! TARGET_B_STEP)
5060 memset (last_group, 0, sizeof last_group);
5062 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5067 if (ia64_safe_type (insn) == TYPE_S)
5070 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5073 errata_emit_nops (insn);
5077 /* Instruction scheduling support. */
5078 /* Describe one bundle. */
5082 /* Zero if there's no possibility of a stop in this bundle other than
5083 at the end, otherwise the position of the optional stop bit. */
5085 /* The types of the three slots. */
5086 enum attr_type t[3];
5087 /* The pseudo op to be emitted into the assembler output. */
5091 #define NR_BUNDLES 10
5093 /* A list of all available bundles. */
5095 static const struct bundle bundle[NR_BUNDLES] =
5097 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5098 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5099 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5100 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5101 #if NR_BUNDLES == 10
5102 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5103 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5105 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5106 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5107 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5108 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5109 it matches an L type insn. Otherwise we'll try to generate L type
5111 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5114 /* Describe a packet of instructions. Packets consist of two bundles that
5115 are visible to the hardware in one scheduling window. */
5119 const struct bundle *t1, *t2;
5120 /* Precomputed value of the first split issue in this packet if a cycle
5121 starts at its beginning. */
5123 /* For convenience, the insn types are replicated here so we don't have
5124 to go through T1 and T2 all the time. */
5125 enum attr_type t[6];
5128 /* An array containing all possible packets. */
5129 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5130 static struct ia64_packet packets[NR_PACKETS];
5132 /* Map attr_type to a string with the name. */
5134 static const char *const type_names[] =
5136 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5139 /* Nonzero if we should insert stop bits into the schedule. */
5140 int ia64_final_schedule = 0;
5142 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5143 static rtx ia64_single_set PARAMS ((rtx));
5144 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5145 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5146 static void maybe_rotate PARAMS ((FILE *));
5147 static void finish_last_head PARAMS ((FILE *, int));
5148 static void rotate_one_bundle PARAMS ((FILE *));
5149 static void rotate_two_bundles PARAMS ((FILE *));
5150 static void nop_cycles_until PARAMS ((int, FILE *));
5151 static void cycle_end_fill_slots PARAMS ((FILE *));
5152 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5153 static int get_split PARAMS ((const struct ia64_packet *, int));
5154 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5155 const struct ia64_packet *, int));
5156 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5157 rtx *, enum attr_type *, int));
5158 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5159 static void dump_current_packet PARAMS ((FILE *));
5160 static void schedule_stop PARAMS ((FILE *));
5161 static rtx gen_nop_type PARAMS ((enum attr_type));
5162 static void ia64_emit_nops PARAMS ((void));
5164 /* Map a bundle number to its pseudo-op. */
5170 return bundle[b].name;
5173 /* Compute the slot which will cause a split issue in packet P if the
5174 current cycle begins at slot BEGIN. */
5177 itanium_split_issue (p, begin)
5178 const struct ia64_packet *p;
5181 int type_count[TYPE_S];
5187 /* Always split before and after MMF. */
5188 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5190 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5192 /* Always split after MBB and BBB. */
5193 if (p->t[1] == TYPE_B)
5195 /* Split after first bundle in MIB BBB combination. */
5196 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5200 memset (type_count, 0, sizeof type_count);
5201 for (i = begin; i < split; i++)
5203 enum attr_type t0 = p->t[i];
5204 /* An MLX bundle reserves the same units as an MFI bundle. */
5205 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5206 : t0 == TYPE_X ? TYPE_I
5209 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5210 2 integer per cycle. */
5211 int max = (t == TYPE_B ? 3 : 2);
5212 if (type_count[t] == max)
5220 /* Return the maximum number of instructions a cpu can issue. */
5228 /* Helper function - like single_set, but look inside COND_EXEC. */
5231 ia64_single_set (insn)
5234 rtx x = PATTERN (insn), ret;
5235 if (GET_CODE (x) == COND_EXEC)
5236 x = COND_EXEC_CODE (x);
5237 if (GET_CODE (x) == SET)
5239 ret = single_set_2 (insn, x);
5240 if (ret == NULL && GET_CODE (x) == PARALLEL)
5242 /* Special case here prologue_allocate_stack and
5243 epilogue_deallocate_stack. Although it is not a classical
5244 single set, the second set is there just to protect it
5245 from moving past FP-relative stack accesses. */
5246 if (XVECLEN (x, 0) == 2
5247 && GET_CODE (XVECEXP (x, 0, 0)) == SET
5248 && GET_CODE (XVECEXP (x, 0, 1)) == SET
5249 && GET_CODE (SET_DEST (XVECEXP (x, 0, 1))) == REG
5250 && SET_DEST (XVECEXP (x, 0, 1)) == SET_SRC (XVECEXP (x, 0, 1))
5251 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5252 ret = XVECEXP (x, 0, 0);
5257 /* Adjust the cost of a scheduling dependency. Return the new cost of
5258 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5261 ia64_adjust_cost (insn, link, dep_insn, cost)
5262 rtx insn, link, dep_insn;
5265 enum attr_type dep_type;
5266 enum attr_itanium_class dep_class;
5267 enum attr_itanium_class insn_class;
5268 rtx dep_set, set, src, addr;
5270 if (GET_CODE (PATTERN (insn)) == CLOBBER
5271 || GET_CODE (PATTERN (insn)) == USE
5272 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5273 || GET_CODE (PATTERN (dep_insn)) == USE
5274 /* @@@ Not accurate for indirect calls. */
5275 || GET_CODE (insn) == CALL_INSN
5276 || ia64_safe_type (insn) == TYPE_S)
5279 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5280 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5283 dep_type = ia64_safe_type (dep_insn);
5284 dep_class = ia64_safe_itanium_class (dep_insn);
5285 insn_class = ia64_safe_itanium_class (insn);
5287 /* Compares that feed a conditional branch can execute in the same
5289 dep_set = ia64_single_set (dep_insn);
5290 set = ia64_single_set (insn);
5292 if (dep_type != TYPE_F
5294 && GET_CODE (SET_DEST (dep_set)) == REG
5295 && PR_REG (REGNO (SET_DEST (dep_set)))
5296 && GET_CODE (insn) == JUMP_INSN)
5299 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5301 /* ??? Can't find any information in the documenation about whether
5305 splits issue. Assume it doesn't. */
5309 src = set ? SET_SRC (set) : 0;
5313 if (GET_CODE (SET_DEST (set)) == MEM)
5314 addr = XEXP (SET_DEST (set), 0);
5315 else if (GET_CODE (SET_DEST (set)) == SUBREG
5316 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5317 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5321 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5322 addr = XVECEXP (addr, 0, 0);
5323 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5324 addr = XEXP (addr, 0);
5325 if (GET_CODE (addr) == MEM)
5326 addr = XEXP (addr, 0);
5332 if (addr && GET_CODE (addr) == POST_MODIFY)
5333 addr = XEXP (addr, 0);
5335 set = ia64_single_set (dep_insn);
5337 if ((dep_class == ITANIUM_CLASS_IALU
5338 || dep_class == ITANIUM_CLASS_ILOG
5339 || dep_class == ITANIUM_CLASS_LD)
5340 && (insn_class == ITANIUM_CLASS_LD
5341 || insn_class == ITANIUM_CLASS_ST))
5343 if (! addr || ! set)
5345 /* This isn't completely correct - an IALU that feeds an address has
5346 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5347 otherwise. Unfortunately there's no good way to describe this. */
5348 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5351 if ((dep_class == ITANIUM_CLASS_IALU
5352 || dep_class == ITANIUM_CLASS_ILOG
5353 || dep_class == ITANIUM_CLASS_LD)
5354 && (insn_class == ITANIUM_CLASS_MMMUL
5355 || insn_class == ITANIUM_CLASS_MMSHF
5356 || insn_class == ITANIUM_CLASS_MMSHFI))
5358 if (dep_class == ITANIUM_CLASS_FMAC
5359 && (insn_class == ITANIUM_CLASS_FMISC
5360 || insn_class == ITANIUM_CLASS_FCVTFX
5361 || insn_class == ITANIUM_CLASS_XMPY))
5363 if ((dep_class == ITANIUM_CLASS_FMAC
5364 || dep_class == ITANIUM_CLASS_FMISC
5365 || dep_class == ITANIUM_CLASS_FCVTFX
5366 || dep_class == ITANIUM_CLASS_XMPY)
5367 && insn_class == ITANIUM_CLASS_STF)
5369 if ((dep_class == ITANIUM_CLASS_MMMUL
5370 || dep_class == ITANIUM_CLASS_MMSHF
5371 || dep_class == ITANIUM_CLASS_MMSHFI)
5372 && (insn_class == ITANIUM_CLASS_LD
5373 || insn_class == ITANIUM_CLASS_ST
5374 || insn_class == ITANIUM_CLASS_IALU
5375 || insn_class == ITANIUM_CLASS_ILOG
5376 || insn_class == ITANIUM_CLASS_ISHF))
5382 /* Describe the current state of the Itanium pipeline. */
5385 /* The first slot that is used in the current cycle. */
5387 /* The next slot to fill. */
5389 /* The packet we have selected for the current issue window. */
5390 const struct ia64_packet *packet;
5391 /* The position of the split issue that occurs due to issue width
5392 limitations (6 if there's no split issue). */
5394 /* Record data about the insns scheduled so far in the same issue
5395 window. The elements up to but not including FIRST_SLOT belong
5396 to the previous cycle, the ones starting with FIRST_SLOT belong
5397 to the current cycle. */
5398 enum attr_type types[6];
5401 /* Nonzero if we decided to schedule a stop bit. */
5405 /* Temporary arrays; they have enough elements to hold all insns that
5406 can be ready at the same time while scheduling of the current block.
5407 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5408 static rtx *sched_ready;
5409 static enum attr_type *sched_types;
5411 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5415 insn_matches_slot (p, itype, slot, insn)
5416 const struct ia64_packet *p;
5417 enum attr_type itype;
5421 enum attr_itanium_requires_unit0 u0;
5422 enum attr_type stype = p->t[slot];
5426 u0 = ia64_safe_itanium_requires_unit0 (insn);
5427 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5430 for (i = sched_data.first_slot; i < slot; i++)
5431 if (p->t[i] == stype
5432 || (stype == TYPE_F && p->t[i] == TYPE_L)
5433 || (stype == TYPE_I && p->t[i] == TYPE_X))
5436 if (GET_CODE (insn) == CALL_INSN)
5438 /* Reject calls in multiway branch packets. We want to limit
5439 the number of multiway branches we generate (since the branch
5440 predictor is limited), and this seems to work fairly well.
5441 (If we didn't do this, we'd have to add another test here to
5442 force calls into the third slot of the bundle.) */
5445 if (p->t[1] == TYPE_B)
5450 if (p->t[4] == TYPE_B)
5458 if (itype == TYPE_A)
5459 return stype == TYPE_M || stype == TYPE_I;
5463 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5464 assembly output a bit prettier. */
5467 ia64_emit_insn_before (insn, before)
5470 rtx prev = PREV_INSN (before);
5471 if (prev && GET_CODE (prev) == INSN
5472 && GET_CODE (PATTERN (prev)) == UNSPEC
5473 && XINT (PATTERN (prev), 1) == 23)
5475 emit_insn_before (insn, before);
5479 /* Generate a nop insn of the given type. Note we never generate L type
5489 return gen_nop_m ();
5491 return gen_nop_i ();
5493 return gen_nop_b ();
5495 return gen_nop_f ();
5497 return gen_nop_x ();
5504 /* When rotating a bundle out of the issue window, insert a bundle selector
5505 insn in front of it. DUMP is the scheduling dump file or NULL. START
5506 is either 0 or 3, depending on whether we want to emit a bundle selector
5507 for the first bundle or the second bundle in the current issue window.
5509 The selector insns are emitted this late because the selected packet can
5510 be changed until parts of it get rotated out. */
5513 finish_last_head (dump, start)
5517 const struct ia64_packet *p = sched_data.packet;
5518 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5519 int bundle_type = b - bundle;
5523 if (! ia64_final_schedule)
5526 for (i = start; sched_data.insns[i] == 0; i++)
5529 insn = sched_data.insns[i];
5532 fprintf (dump, "// Emitting template before %d: %s\n",
5533 INSN_UID (insn), b->name);
5535 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5538 /* We can't schedule more insns this cycle. Fix up the scheduling state
5539 and advance FIRST_SLOT and CUR.
5540 We have to distribute the insns that are currently found between
5541 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5542 far, they are stored successively in the fields starting at FIRST_SLOT;
5543 now they must be moved to the correct slots.
5544 DUMP is the current scheduling dump file, or NULL. */
5547 cycle_end_fill_slots (dump)
5550 const struct ia64_packet *packet = sched_data.packet;
5552 enum attr_type tmp_types[6];
5555 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5556 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5558 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5560 enum attr_type t = tmp_types[i];
5561 if (t != ia64_safe_type (tmp_insns[i]))
5563 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5565 if (slot > sched_data.split)
5568 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5570 sched_data.types[slot] = packet->t[slot];
5571 sched_data.insns[slot] = 0;
5572 sched_data.stopbit[slot] = 0;
5574 /* ??? TYPE_L instructions always fill up two slots, but we don't
5575 support TYPE_L nops. */
5576 if (packet->t[slot] == TYPE_L)
5581 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5582 actual slot type later. */
5583 sched_data.types[slot] = packet->t[slot];
5584 sched_data.insns[slot] = tmp_insns[i];
5585 sched_data.stopbit[slot] = 0;
5587 /* TYPE_L instructions always fill up two slots. */
5592 /* This isn't right - there's no need to pad out until the forced split;
5593 the CPU will automatically split if an insn isn't ready. */
5595 while (slot < sched_data.split)
5597 sched_data.types[slot] = packet->t[slot];
5598 sched_data.insns[slot] = 0;
5599 sched_data.stopbit[slot] = 0;
5604 sched_data.first_slot = sched_data.cur = slot;
5607 /* Bundle rotations, as described in the Itanium optimization manual.
5608 We can rotate either one or both bundles out of the issue window.
5609 DUMP is the current scheduling dump file, or NULL. */
5612 rotate_one_bundle (dump)
5616 fprintf (dump, "// Rotating one bundle.\n");
5618 finish_last_head (dump, 0);
5619 if (sched_data.cur > 3)
5621 sched_data.cur -= 3;
5622 sched_data.first_slot -= 3;
5623 memmove (sched_data.types,
5624 sched_data.types + 3,
5625 sched_data.cur * sizeof *sched_data.types);
5626 memmove (sched_data.stopbit,
5627 sched_data.stopbit + 3,
5628 sched_data.cur * sizeof *sched_data.stopbit);
5629 memmove (sched_data.insns,
5630 sched_data.insns + 3,
5631 sched_data.cur * sizeof *sched_data.insns);
5636 sched_data.first_slot = 0;
5641 rotate_two_bundles (dump)
5645 fprintf (dump, "// Rotating two bundles.\n");
5647 if (sched_data.cur == 0)
5650 finish_last_head (dump, 0);
5651 if (sched_data.cur > 3)
5652 finish_last_head (dump, 3);
5654 sched_data.first_slot = 0;
5657 /* We're beginning a new block. Initialize data structures as necessary. */
5660 ia64_sched_init (dump, sched_verbose, max_ready)
5661 FILE *dump ATTRIBUTE_UNUSED;
5662 int sched_verbose ATTRIBUTE_UNUSED;
5665 static int initialized = 0;
5673 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5675 const struct bundle *t1 = bundle + b1;
5676 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5678 const struct bundle *t2 = bundle + b2;
5684 for (i = 0; i < NR_PACKETS; i++)
5687 for (j = 0; j < 3; j++)
5688 packets[i].t[j] = packets[i].t1->t[j];
5689 for (j = 0; j < 3; j++)
5690 packets[i].t[j + 3] = packets[i].t2->t[j];
5691 packets[i].first_split = itanium_split_issue (packets + i, 0);
5696 init_insn_group_barriers ();
5698 memset (&sched_data, 0, sizeof sched_data);
5699 sched_types = (enum attr_type *) xmalloc (max_ready
5700 * sizeof (enum attr_type));
5701 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5704 /* See if the packet P can match the insns we have already scheduled. Return
5705 nonzero if so. In *PSLOT, we store the first slot that is available for
5706 more instructions if we choose this packet.
5707 SPLIT holds the last slot we can use, there's a split issue after it so
5708 scheduling beyond it would cause us to use more than one cycle. */
5711 packet_matches_p (p, split, pslot)
5712 const struct ia64_packet *p;
5716 int filled = sched_data.cur;
5717 int first = sched_data.first_slot;
5720 /* First, check if the first of the two bundles must be a specific one (due
5722 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5724 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5727 for (i = 0; i < first; i++)
5728 if (! insn_matches_slot (p, sched_data.types[i], i,
5729 sched_data.insns[i]))
5731 for (i = slot = first; i < filled; i++)
5733 while (slot < split)
5735 if (insn_matches_slot (p, sched_data.types[i], slot,
5736 sched_data.insns[i]))
5750 /* A frontend for itanium_split_issue. For a packet P and a slot
5751 number FIRST that describes the start of the current clock cycle,
5752 return the slot number of the first split issue. This function
5753 uses the cached number found in P if possible. */
5756 get_split (p, first)
5757 const struct ia64_packet *p;
5761 return p->first_split;
5762 return itanium_split_issue (p, first);
5765 /* Given N_READY insns in the array READY, whose types are found in the
5766 corresponding array TYPES, return the insn that is best suited to be
5767 scheduled in slot SLOT of packet P. */
5770 find_best_insn (ready, types, n_ready, p, slot)
5772 enum attr_type *types;
5774 const struct ia64_packet *p;
5779 while (n_ready-- > 0)
5781 rtx insn = ready[n_ready];
5784 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5786 /* If we have equally good insns, one of which has a stricter
5787 slot requirement, prefer the one with the stricter requirement. */
5788 if (best >= 0 && types[n_ready] == TYPE_A)
5790 if (insn_matches_slot (p, types[n_ready], slot, insn))
5793 best_pri = INSN_PRIORITY (ready[best]);
5795 /* If there's no way we could get a stricter requirement, stop
5797 if (types[n_ready] != TYPE_A
5798 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5806 /* Select the best packet to use given the current scheduler state and the
5808 READY is an array holding N_READY ready insns; TYPES is a corresponding
5809 array that holds their types. Store the best packet in *PPACKET and the
5810 number of insns that can be scheduled in the current cycle in *PBEST. */
5813 find_best_packet (pbest, ppacket, ready, types, n_ready)
5815 const struct ia64_packet **ppacket;
5817 enum attr_type *types;
5820 int first = sched_data.first_slot;
5823 const struct ia64_packet *best_packet = NULL;
5826 for (i = 0; i < NR_PACKETS; i++)
5828 const struct ia64_packet *p = packets + i;
5830 int split = get_split (p, first);
5832 int first_slot, last_slot;
5835 if (! packet_matches_p (p, split, &first_slot))
5838 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5842 for (slot = first_slot; slot < split; slot++)
5846 /* Disallow a degenerate case where the first bundle doesn't
5847 contain anything but NOPs! */
5848 if (first_slot == 0 && win == 0 && slot == 3)
5854 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5857 sched_ready[insn_nr] = 0;
5861 else if (p->t[slot] == TYPE_B)
5864 /* We must disallow MBB/BBB packets if any of their B slots would be
5865 filled with nops. */
5868 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5873 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5878 || (win == best && last_slot < lowest_end))
5881 lowest_end = last_slot;
5886 *ppacket = best_packet;
5889 /* Reorder the ready list so that the insns that can be issued in this cycle
5890 are found in the correct order at the end of the list.
5891 DUMP is the scheduling dump file, or NULL. READY points to the start,
5892 E_READY to the end of the ready list. MAY_FAIL determines what should be
5893 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5894 otherwise we return 0.
5895 Return 1 if any insns can be scheduled in this cycle. */
5898 itanium_reorder (dump, ready, e_ready, may_fail)
5904 const struct ia64_packet *best_packet;
5905 int n_ready = e_ready - ready;
5906 int first = sched_data.first_slot;
5907 int i, best, best_split, filled;
5909 for (i = 0; i < n_ready; i++)
5910 sched_types[i] = ia64_safe_type (ready[i]);
5912 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5923 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5924 best_packet->t1->name,
5925 best_packet->t2 ? best_packet->t2->name : NULL, best);
5928 best_split = itanium_split_issue (best_packet, first);
5929 packet_matches_p (best_packet, best_split, &filled);
5931 for (i = filled; i < best_split; i++)
5935 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5938 rtx insn = ready[insn_nr];
5939 memmove (ready + insn_nr, ready + insn_nr + 1,
5940 (n_ready - insn_nr - 1) * sizeof (rtx));
5941 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5942 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5943 ready[--n_ready] = insn;
5947 sched_data.packet = best_packet;
5948 sched_data.split = best_split;
5952 /* Dump information about the current scheduling state to file DUMP. */
5955 dump_current_packet (dump)
5959 fprintf (dump, "// %d slots filled:", sched_data.cur);
5960 for (i = 0; i < sched_data.first_slot; i++)
5962 rtx insn = sched_data.insns[i];
5963 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5965 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5966 if (sched_data.stopbit[i])
5967 fprintf (dump, " ;;");
5969 fprintf (dump, " :::");
5970 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5972 rtx insn = sched_data.insns[i];
5973 enum attr_type t = ia64_safe_type (insn);
5974 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5976 fprintf (dump, "\n");
5979 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5983 schedule_stop (dump)
5986 const struct ia64_packet *best = sched_data.packet;
5991 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5993 if (sched_data.cur == 0)
5996 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5998 rotate_two_bundles (NULL);
6002 for (i = -1; i < NR_PACKETS; i++)
6004 /* This is a slight hack to give the current packet the first chance.
6005 This is done to avoid e.g. switching from MIB to MBB bundles. */
6006 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6007 int split = get_split (p, sched_data.first_slot);
6008 const struct bundle *compare;
6011 if (! packet_matches_p (p, split, &next))
6014 compare = next > 3 ? p->t2 : p->t1;
6017 if (compare->possible_stop)
6018 stoppos = compare->possible_stop;
6022 if (stoppos < next || stoppos >= best_stop)
6024 if (compare->possible_stop == 0)
6026 stoppos = (next > 3 ? 6 : 3);
6028 if (stoppos < next || stoppos >= best_stop)
6032 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6033 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6036 best_stop = stoppos;
6040 sched_data.packet = best;
6041 cycle_end_fill_slots (dump);
6042 while (sched_data.cur < best_stop)
6044 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6045 sched_data.insns[sched_data.cur] = 0;
6046 sched_data.stopbit[sched_data.cur] = 0;
6049 sched_data.stopbit[sched_data.cur - 1] = 1;
6050 sched_data.first_slot = best_stop;
6053 dump_current_packet (dump);
6056 /* If necessary, perform one or two rotations on the scheduling state.
6057 This should only be called if we are starting a new cycle. */
6063 if (sched_data.cur == 6)
6064 rotate_two_bundles (dump);
6065 else if (sched_data.cur >= 3)
6066 rotate_one_bundle (dump);
6067 sched_data.first_slot = sched_data.cur;
6070 /* The clock cycle when ia64_sched_reorder was last called. */
6071 static int prev_cycle;
6073 /* The first insn scheduled in the previous cycle. This is the saved
6074 value of sched_data.first_slot. */
6075 static int prev_first;
6077 /* The last insn that has been scheduled. At the start of a new cycle
6078 we know that we can emit new insns after it; the main scheduling code
6079 has already emitted a cycle_display insn after it and is using that
6080 as its current last insn. */
6081 static rtx last_issued;
6083 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6084 pad out the delay between MM (shifts, etc.) and integer operations. */
6087 nop_cycles_until (clock_var, dump)
6091 int prev_clock = prev_cycle;
6092 int cycles_left = clock_var - prev_clock;
6094 /* Finish the previous cycle; pad it out with NOPs. */
6095 if (sched_data.cur == 3)
6097 rtx t = gen_insn_group_barrier (GEN_INT (3));
6098 last_issued = emit_insn_after (t, last_issued);
6099 maybe_rotate (dump);
6101 else if (sched_data.cur > 0)
6104 int split = itanium_split_issue (sched_data.packet, prev_first);
6106 if (sched_data.cur < 3 && split > 3)
6112 if (split > sched_data.cur)
6115 for (i = sched_data.cur; i < split; i++)
6119 t = gen_nop_type (sched_data.packet->t[i]);
6120 last_issued = emit_insn_after (t, last_issued);
6121 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6122 sched_data.insns[i] = last_issued;
6123 sched_data.stopbit[i] = 0;
6125 sched_data.cur = split;
6128 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6132 for (i = sched_data.cur; i < 6; i++)
6136 t = gen_nop_type (sched_data.packet->t[i]);
6137 last_issued = emit_insn_after (t, last_issued);
6138 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6139 sched_data.insns[i] = last_issued;
6140 sched_data.stopbit[i] = 0;
6147 if (need_stop || sched_data.cur == 6)
6149 rtx t = gen_insn_group_barrier (GEN_INT (3));
6150 last_issued = emit_insn_after (t, last_issued);
6152 maybe_rotate (dump);
6156 while (cycles_left > 0)
6158 rtx t = gen_bundle_selector (GEN_INT (0));
6159 last_issued = emit_insn_after (t, last_issued);
6160 t = gen_nop_type (TYPE_M);
6161 last_issued = emit_insn_after (t, last_issued);
6162 t = gen_nop_type (TYPE_I);
6163 last_issued = emit_insn_after (t, last_issued);
6164 if (cycles_left > 1)
6166 t = gen_insn_group_barrier (GEN_INT (2));
6167 last_issued = emit_insn_after (t, last_issued);
6170 t = gen_nop_type (TYPE_I);
6171 last_issued = emit_insn_after (t, last_issued);
6172 t = gen_insn_group_barrier (GEN_INT (3));
6173 last_issued = emit_insn_after (t, last_issued);
6178 /* We are about to being issuing insns for this clock cycle.
6179 Override the default sort algorithm to better slot instructions. */
6182 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6183 reorder_type, clock_var)
6184 FILE *dump ATTRIBUTE_UNUSED;
6185 int sched_verbose ATTRIBUTE_UNUSED;
6188 int reorder_type, clock_var;
6191 int n_ready = *pn_ready;
6192 rtx *e_ready = ready + n_ready;
6197 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6198 dump_current_packet (dump);
6201 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6203 for (insnp = ready; insnp < e_ready; insnp++)
6206 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6207 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
6208 || t == ITANIUM_CLASS_ILOG
6209 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
6212 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6213 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
6214 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
6216 rtx other = XEXP (link, 0);
6217 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6218 if (t0 == ITANIUM_CLASS_MMSHF
6219 || t0 == ITANIUM_CLASS_MMMUL)
6221 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6230 prev_first = sched_data.first_slot;
6231 prev_cycle = clock_var;
6233 if (reorder_type == 0)
6234 maybe_rotate (sched_verbose ? dump : NULL);
6236 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6238 for (insnp = ready; insnp < e_ready; insnp++)
6239 if (insnp < e_ready)
6242 enum attr_type t = ia64_safe_type (insn);
6243 if (t == TYPE_UNKNOWN)
6245 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6246 || asm_noperands (PATTERN (insn)) >= 0)
6248 rtx lowest = ready[n_asms];
6249 ready[n_asms] = insn;
6255 rtx highest = ready[n_ready - 1];
6256 ready[n_ready - 1] = insn;
6258 if (ia64_final_schedule && group_barrier_needed_p (insn))
6260 schedule_stop (sched_verbose ? dump : NULL);
6261 sched_data.last_was_stop = 1;
6262 maybe_rotate (sched_verbose ? dump : NULL);
6269 if (n_asms < n_ready)
6271 /* Some normal insns to process. Skip the asms. */
6275 else if (n_ready > 0)
6277 /* Only asm insns left. */
6278 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6280 schedule_stop (sched_verbose ? dump : NULL);
6281 sched_data.last_was_stop = 1;
6282 maybe_rotate (sched_verbose ? dump : NULL);
6284 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6288 if (ia64_final_schedule)
6290 int nr_need_stop = 0;
6292 for (insnp = ready; insnp < e_ready; insnp++)
6293 if (safe_group_barrier_needed_p (*insnp))
6296 /* Schedule a stop bit if
6297 - all insns require a stop bit, or
6298 - we are starting a new cycle and _any_ insns require a stop bit.
6299 The reason for the latter is that if our schedule is accurate, then
6300 the additional stop won't decrease performance at this point (since
6301 there's a split issue at this point anyway), but it gives us more
6302 freedom when scheduling the currently ready insns. */
6303 if ((reorder_type == 0 && nr_need_stop)
6304 || (reorder_type == 1 && n_ready == nr_need_stop))
6306 schedule_stop (sched_verbose ? dump : NULL);
6307 sched_data.last_was_stop = 1;
6308 maybe_rotate (sched_verbose ? dump : NULL);
6309 if (reorder_type == 1)
6316 /* Move down everything that needs a stop bit, preserving relative
6318 while (insnp-- > ready + deleted)
6319 while (insnp >= ready + deleted)
6322 if (! safe_group_barrier_needed_p (insn))
6324 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6330 if (deleted != nr_need_stop)
6335 return itanium_reorder (sched_verbose ? dump : NULL,
6336 ready, e_ready, reorder_type == 1);
6340 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6347 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6348 pn_ready, 0, clock_var);
6351 /* Like ia64_sched_reorder, but called after issuing each insn.
6352 Override the default sort algorithm to better slot instructions. */
6355 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6356 FILE *dump ATTRIBUTE_UNUSED;
6357 int sched_verbose ATTRIBUTE_UNUSED;
6362 if (sched_data.last_was_stop)
6365 /* Detect one special case and try to optimize it.
6366 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6367 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6368 if (sched_data.first_slot == 1
6369 && sched_data.stopbit[0]
6370 && ((sched_data.cur == 4
6371 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6372 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6373 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6374 || (sched_data.cur == 3
6375 && (sched_data.types[1] == TYPE_M
6376 || sched_data.types[1] == TYPE_A)
6377 && (sched_data.types[2] != TYPE_M
6378 && sched_data.types[2] != TYPE_I
6379 && sched_data.types[2] != TYPE_A))))
6383 rtx stop = sched_data.insns[1];
6385 /* Search backward for the stop bit that must be there. */
6390 stop = PREV_INSN (stop);
6391 if (GET_CODE (stop) != INSN)
6393 insn_code = recog_memoized (stop);
6395 /* Ignore cycle displays and .pred.rel.mutex. */
6396 if (insn_code == CODE_FOR_cycle_display
6397 || insn_code == CODE_FOR_pred_rel_mutex
6398 || insn_code == CODE_FOR_prologue_use)
6401 if (insn_code == CODE_FOR_insn_group_barrier)
6406 /* Adjust the stop bit's slot selector. */
6407 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6409 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6411 sched_data.stopbit[0] = 0;
6412 sched_data.stopbit[2] = 1;
6414 sched_data.types[5] = sched_data.types[3];
6415 sched_data.types[4] = sched_data.types[2];
6416 sched_data.types[3] = sched_data.types[1];
6417 sched_data.insns[5] = sched_data.insns[3];
6418 sched_data.insns[4] = sched_data.insns[2];
6419 sched_data.insns[3] = sched_data.insns[1];
6420 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6421 sched_data.cur += 2;
6422 sched_data.first_slot = 3;
6423 for (i = 0; i < NR_PACKETS; i++)
6425 const struct ia64_packet *p = packets + i;
6426 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6428 sched_data.packet = p;
6432 rotate_one_bundle (sched_verbose ? dump : NULL);
6435 for (i = 0; i < NR_PACKETS; i++)
6437 const struct ia64_packet *p = packets + i;
6438 int split = get_split (p, sched_data.first_slot);
6441 /* Disallow multiway branches here. */
6442 if (p->t[1] == TYPE_B)
6445 if (packet_matches_p (p, split, &next) && next < best)
6448 sched_data.packet = p;
6449 sched_data.split = split;
6458 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6463 /* Did we schedule a stop? If so, finish this cycle. */
6464 if (sched_data.cur == sched_data.first_slot)
6469 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6471 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6473 dump_current_packet (dump);
6477 /* We are about to issue INSN. Return the number of insns left on the
6478 ready queue that can be issued this cycle. */
6481 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6485 int can_issue_more ATTRIBUTE_UNUSED;
6487 enum attr_type t = ia64_safe_type (insn);
6491 if (sched_data.last_was_stop)
6493 int t = sched_data.first_slot;
6496 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6497 init_insn_group_barriers ();
6498 sched_data.last_was_stop = 0;
6501 if (t == TYPE_UNKNOWN)
6504 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6505 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6506 || asm_noperands (PATTERN (insn)) >= 0)
6508 /* This must be some kind of asm. Clear the scheduling state. */
6509 rotate_two_bundles (sched_verbose ? dump : NULL);
6510 if (ia64_final_schedule)
6511 group_barrier_needed_p (insn);
6516 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6517 important state info. Don't delete this test. */
6518 if (ia64_final_schedule
6519 && group_barrier_needed_p (insn))
6522 sched_data.stopbit[sched_data.cur] = 0;
6523 sched_data.insns[sched_data.cur] = insn;
6524 sched_data.types[sched_data.cur] = t;
6528 fprintf (dump, "// Scheduling insn %d of type %s\n",
6529 INSN_UID (insn), type_names[t]);
6531 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6533 schedule_stop (sched_verbose ? dump : NULL);
6534 sched_data.last_was_stop = 1;
6540 /* Free data allocated by ia64_sched_init. */
6543 ia64_sched_finish (dump, sched_verbose)
6548 fprintf (dump, "// Finishing schedule.\n");
6549 rotate_two_bundles (NULL);
6555 ia64_cycle_display (clock, last)
6559 if (ia64_final_schedule)
6560 return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last);
6565 /* Emit pseudo-ops for the assembler to describe predicate relations.
6566 At present this assumes that we only consider predicate pairs to
6567 be mutex, and that the assembler can deduce proper values from
6568 straight-line code. */
6571 emit_predicate_relation_info ()
6575 for (i = n_basic_blocks - 1; i >= 0; --i)
6577 basic_block bb = BASIC_BLOCK (i);
6579 rtx head = bb->head;
6581 /* We only need such notes at code labels. */
6582 if (GET_CODE (head) != CODE_LABEL)
6584 if (GET_CODE (NEXT_INSN (head)) == NOTE
6585 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6586 head = NEXT_INSN (head);
6588 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6589 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6591 rtx p = gen_rtx_REG (BImode, r);
6592 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6593 if (head == bb->end)
6599 /* Look for conditional calls that do not return, and protect predicate
6600 relations around them. Otherwise the assembler will assume the call
6601 returns, and complain about uses of call-clobbered predicates after
6603 for (i = n_basic_blocks - 1; i >= 0; --i)
6605 basic_block bb = BASIC_BLOCK (i);
6606 rtx insn = bb->head;
6610 if (GET_CODE (insn) == CALL_INSN
6611 && GET_CODE (PATTERN (insn)) == COND_EXEC
6612 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6614 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6615 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6616 if (bb->head == insn)
6618 if (bb->end == insn)
6622 if (insn == bb->end)
6624 insn = NEXT_INSN (insn);
6629 /* Generate a NOP instruction of type T. We will never generate L type
6639 return gen_nop_m ();
6641 return gen_nop_i ();
6643 return gen_nop_b ();
6645 return gen_nop_f ();
6647 return gen_nop_x ();
6653 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6654 here than while scheduling. */
6660 const struct bundle *b = 0;
6663 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6667 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6668 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6670 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6671 || GET_CODE (insn) == CODE_LABEL)
6674 while (bundle_pos < 3)
6676 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6679 if (GET_CODE (insn) != CODE_LABEL)
6680 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6686 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6688 int t = INTVAL (XVECEXP (pat, 0, 0));
6690 while (bundle_pos < t)
6692 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6698 if (bundle_pos == 3)
6701 if (b && INSN_P (insn))
6703 t = ia64_safe_type (insn);
6704 if (asm_noperands (PATTERN (insn)) >= 0
6705 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6707 while (bundle_pos < 3)
6709 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6715 if (t == TYPE_UNKNOWN)
6717 while (bundle_pos < 3)
6719 if (t == b->t[bundle_pos]
6720 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6721 || b->t[bundle_pos] == TYPE_I)))
6724 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6733 /* Perform machine dependent operations on the rtl chain INSNS. */
6739 /* If optimizing, we'll have split before scheduling. */
6741 split_all_insns_noflow ();
6743 /* Make sure the CFG and global_live_at_start are correct
6744 for emit_predicate_relation_info. */
6745 find_basic_blocks (insns, max_reg_num (), NULL);
6746 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6748 if (ia64_flag_schedule_insns2)
6750 timevar_push (TV_SCHED2);
6751 ia64_final_schedule = 1;
6752 schedule_ebbs (rtl_dump_file);
6753 ia64_final_schedule = 0;
6754 timevar_pop (TV_SCHED2);
6756 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6757 place as they were during scheduling. */
6758 emit_insn_group_barriers (rtl_dump_file, insns);
6762 emit_all_insn_group_barriers (rtl_dump_file, insns);
6764 /* A call must not be the last instruction in a function, so that the
6765 return address is still within the function, so that unwinding works
6766 properly. Note that IA-64 differs from dwarf2 on this point. */
6767 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6772 insn = get_last_insn ();
6773 if (! INSN_P (insn))
6774 insn = prev_active_insn (insn);
6775 if (GET_CODE (insn) == INSN
6776 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6777 && XINT (PATTERN (insn), 1) == 2)
6780 insn = prev_active_insn (insn);
6782 if (GET_CODE (insn) == CALL_INSN)
6785 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6786 emit_insn (gen_break_f ());
6787 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6792 emit_predicate_relation_info ();
6795 /* Return true if REGNO is used by the epilogue. */
6798 ia64_epilogue_uses (regno)
6804 /* When a function makes a call through a function descriptor, we
6805 will write a (potentially) new value to "gp". After returning
6806 from such a call, we need to make sure the function restores the
6807 original gp-value, even if the function itself does not use the
6809 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6811 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6812 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6813 /* For functions defined with the syscall_linkage attribute, all
6814 input registers are marked as live at all function exits. This
6815 prevents the register allocator from using the input registers,
6816 which in turn makes it possible to restart a system call after
6817 an interrupt without having to save/restore the input registers.
6818 This also prevents kernel data from leaking to application code. */
6819 return lookup_attribute ("syscall_linkage",
6820 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6823 /* Conditional return patterns can't represent the use of `b0' as
6824 the return address, so we force the value live this way. */
6828 /* Likewise for ar.pfs, which is used by br.ret. */
6836 /* Table of valid machine attributes. */
6837 const struct attribute_spec ia64_attribute_table[] =
6839 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6840 { "syscall_linkage", 0, 0, false, true, true, NULL },
6841 { NULL, 0, 0, false, false, false, NULL }
6844 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6846 We add @ to the name if this goes in small data/bss. We can only put
6847 a variable in small data/bss if it is defined in this module or a module
6848 that we are statically linked with. We can't check the second condition,
6849 but TREE_STATIC gives us the first one. */
6851 /* ??? If we had IPA, we could check the second condition. We could support
6852 programmer added section attributes if the variable is not defined in this
6855 /* ??? See the v850 port for a cleaner way to do this. */
6857 /* ??? We could also support own long data here. Generating movl/add/ld8
6858 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6859 code faster because there is one less load. This also includes incomplete
6860 types which can't go in sdata/sbss. */
6862 /* ??? See select_section. We must put short own readonly variables in
6863 sdata/sbss instead of the more natural rodata, because we can't perform
6864 the DECL_READONLY_SECTION test here. */
6866 extern struct obstack * saveable_obstack;
6869 ia64_encode_section_info (decl)
6872 const char *symbol_str;
6874 if (TREE_CODE (decl) == FUNCTION_DECL)
6876 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6880 /* Careful not to prod global register variables. */
6881 if (TREE_CODE (decl) != VAR_DECL
6882 || GET_CODE (DECL_RTL (decl)) != MEM
6883 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6886 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6888 /* We assume that -fpic is used only to create a shared library (dso).
6889 With -fpic, no global data can ever be sdata.
6890 Without -fpic, global common uninitialized data can never be sdata, since
6891 it can unify with a real definition in a dso. */
6892 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6893 to access them. The linker may then be able to do linker relaxation to
6894 optimize references to them. Currently sdata implies use of gprel. */
6895 /* We need the DECL_EXTERNAL check for C++. static class data members get
6896 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6897 statically allocated, but the space is allocated somewhere else. Such
6898 decls can not be own data. */
6899 if (! TARGET_NO_SDATA
6900 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6901 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6902 && ! (TREE_PUBLIC (decl)
6904 || (DECL_COMMON (decl)
6905 && (DECL_INITIAL (decl) == 0
6906 || DECL_INITIAL (decl) == error_mark_node))))
6907 /* Either the variable must be declared without a section attribute,
6908 or the section must be sdata or sbss. */
6909 && (DECL_SECTION_NAME (decl) == 0
6910 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6912 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6915 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6917 /* If the variable has already been defined in the output file, then it
6918 is too late to put it in sdata if it wasn't put there in the first
6919 place. The test is here rather than above, because if it is already
6920 in sdata, then it can stay there. */
6922 if (TREE_ASM_WRITTEN (decl))
6925 /* If this is an incomplete type with size 0, then we can't put it in
6926 sdata because it might be too big when completed. */
6928 && size <= (HOST_WIDE_INT) ia64_section_threshold
6929 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6931 size_t len = strlen (symbol_str);
6932 char *newstr = alloca (len + 1);
6935 *newstr = SDATA_NAME_FLAG_CHAR;
6936 memcpy (newstr + 1, symbol_str, len + 1);
6938 string = ggc_alloc_string (newstr, len + 1);
6939 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6942 /* This decl is marked as being in small data/bss but it shouldn't
6943 be; one likely explanation for this is that the decl has been
6944 moved into a different section from the one it was in when
6945 ENCODE_SECTION_INFO was first called. Remove the '@'. */
6946 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6948 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6949 = ggc_strdup (symbol_str + 1);
6953 /* Output assembly directives for prologue regions. */
6955 /* The current basic block number. */
6957 static int block_num;
6959 /* True if we need a copy_state command at the start of the next block. */
6961 static int need_copy_state;
6963 /* The function emits unwind directives for the start of an epilogue. */
6968 /* If this isn't the last block of the function, then we need to label the
6969 current state, and copy it back in at the start of the next block. */
6971 if (block_num != n_basic_blocks - 1)
6973 fprintf (asm_out_file, "\t.label_state 1\n");
6974 need_copy_state = 1;
6977 fprintf (asm_out_file, "\t.restore sp\n");
6980 /* This function processes a SET pattern looking for specific patterns
6981 which result in emitting an assembly directive required for unwinding. */
6984 process_set (asm_out_file, pat)
6988 rtx src = SET_SRC (pat);
6989 rtx dest = SET_DEST (pat);
6990 int src_regno, dest_regno;
6992 /* Look for the ALLOC insn. */
6993 if (GET_CODE (src) == UNSPEC_VOLATILE
6994 && XINT (src, 1) == 0
6995 && GET_CODE (dest) == REG)
6997 dest_regno = REGNO (dest);
6999 /* If this isn't the final destination for ar.pfs, the alloc
7000 shouldn't have been marked frame related. */
7001 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7004 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7005 ia64_dbx_register_number (dest_regno));
7009 /* Look for SP = .... */
7010 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7012 if (GET_CODE (src) == PLUS)
7014 rtx op0 = XEXP (src, 0);
7015 rtx op1 = XEXP (src, 1);
7016 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7018 if (INTVAL (op1) < 0)
7020 fputs ("\t.fframe ", asm_out_file);
7021 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7023 fputc ('\n', asm_out_file);
7026 process_epilogue ();
7031 else if (GET_CODE (src) == REG
7032 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7033 process_epilogue ();
7040 /* Register move we need to look at. */
7041 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7043 src_regno = REGNO (src);
7044 dest_regno = REGNO (dest);
7049 /* Saving return address pointer. */
7050 if (dest_regno != current_frame_info.reg_save_b0)
7052 fprintf (asm_out_file, "\t.save rp, r%d\n",
7053 ia64_dbx_register_number (dest_regno));
7057 if (dest_regno != current_frame_info.reg_save_pr)
7059 fprintf (asm_out_file, "\t.save pr, r%d\n",
7060 ia64_dbx_register_number (dest_regno));
7063 case AR_UNAT_REGNUM:
7064 if (dest_regno != current_frame_info.reg_save_ar_unat)
7066 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7067 ia64_dbx_register_number (dest_regno));
7071 if (dest_regno != current_frame_info.reg_save_ar_lc)
7073 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7074 ia64_dbx_register_number (dest_regno));
7077 case STACK_POINTER_REGNUM:
7078 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7079 || ! frame_pointer_needed)
7081 fprintf (asm_out_file, "\t.vframe r%d\n",
7082 ia64_dbx_register_number (dest_regno));
7086 /* Everything else should indicate being stored to memory. */
7091 /* Memory store we need to look at. */
7092 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7098 if (GET_CODE (XEXP (dest, 0)) == REG)
7100 base = XEXP (dest, 0);
7103 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7104 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7106 base = XEXP (XEXP (dest, 0), 0);
7107 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7112 if (base == hard_frame_pointer_rtx)
7114 saveop = ".savepsp";
7117 else if (base == stack_pointer_rtx)
7122 src_regno = REGNO (src);
7126 if (current_frame_info.reg_save_b0 != 0)
7128 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7132 if (current_frame_info.reg_save_pr != 0)
7134 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7138 if (current_frame_info.reg_save_ar_lc != 0)
7140 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7144 if (current_frame_info.reg_save_ar_pfs != 0)
7146 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7149 case AR_UNAT_REGNUM:
7150 if (current_frame_info.reg_save_ar_unat != 0)
7152 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7159 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7160 1 << (src_regno - GR_REG (4)));
7168 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7169 1 << (src_regno - BR_REG (1)));
7176 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7177 1 << (src_regno - FR_REG (2)));
7180 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7181 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7182 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7183 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7184 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7185 1 << (src_regno - FR_REG (12)));
7197 /* This function looks at a single insn and emits any directives
7198 required to unwind this insn. */
7200 process_for_unwind_directive (asm_out_file, insn)
7204 if (flag_unwind_tables
7205 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7209 if (GET_CODE (insn) == NOTE
7210 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7212 block_num = NOTE_BASIC_BLOCK (insn)->index;
7214 /* Restore unwind state from immediately before the epilogue. */
7215 if (need_copy_state)
7217 fprintf (asm_out_file, "\t.body\n");
7218 fprintf (asm_out_file, "\t.copy_state 1\n");
7219 need_copy_state = 0;
7223 if (! RTX_FRAME_RELATED_P (insn))
7226 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7228 pat = XEXP (pat, 0);
7230 pat = PATTERN (insn);
7232 switch (GET_CODE (pat))
7235 process_set (asm_out_file, pat);
7241 int limit = XVECLEN (pat, 0);
7242 for (par_index = 0; par_index < limit; par_index++)
7244 rtx x = XVECEXP (pat, 0, par_index);
7245 if (GET_CODE (x) == SET)
7246 process_set (asm_out_file, x);
7259 ia64_init_builtins ()
7261 tree psi_type_node = build_pointer_type (integer_type_node);
7262 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7263 tree endlink = void_list_node;
7265 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7266 tree si_ftype_psi_si_si
7267 = build_function_type (integer_type_node,
7268 tree_cons (NULL_TREE, psi_type_node,
7269 tree_cons (NULL_TREE, integer_type_node,
7270 tree_cons (NULL_TREE,
7274 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7275 tree di_ftype_pdi_di_di
7276 = build_function_type (long_integer_type_node,
7277 tree_cons (NULL_TREE, pdi_type_node,
7278 tree_cons (NULL_TREE,
7279 long_integer_type_node,
7280 tree_cons (NULL_TREE,
7281 long_integer_type_node,
7283 /* __sync_synchronize */
7284 tree void_ftype_void
7285 = build_function_type (void_type_node, endlink);
7287 /* __sync_lock_test_and_set_si */
7288 tree si_ftype_psi_si
7289 = build_function_type (integer_type_node,
7290 tree_cons (NULL_TREE, psi_type_node,
7291 tree_cons (NULL_TREE, integer_type_node, endlink)));
7293 /* __sync_lock_test_and_set_di */
7294 tree di_ftype_pdi_di
7295 = build_function_type (long_integer_type_node,
7296 tree_cons (NULL_TREE, pdi_type_node,
7297 tree_cons (NULL_TREE, long_integer_type_node,
7300 /* __sync_lock_release_si */
7302 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7305 /* __sync_lock_release_di */
7307 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7310 #define def_builtin(name, type, code) \
7311 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7313 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7314 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7315 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7316 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7317 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7318 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7319 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7320 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7322 def_builtin ("__sync_synchronize", void_ftype_void,
7323 IA64_BUILTIN_SYNCHRONIZE);
7325 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7326 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7327 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7328 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7329 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7330 IA64_BUILTIN_LOCK_RELEASE_SI);
7331 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7332 IA64_BUILTIN_LOCK_RELEASE_DI);
7334 def_builtin ("__builtin_ia64_bsp",
7335 build_function_type (ptr_type_node, endlink),
7338 def_builtin ("__builtin_ia64_flushrs",
7339 build_function_type (void_type_node, endlink),
7340 IA64_BUILTIN_FLUSHRS);
7342 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7343 IA64_BUILTIN_FETCH_AND_ADD_SI);
7344 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7345 IA64_BUILTIN_FETCH_AND_SUB_SI);
7346 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7347 IA64_BUILTIN_FETCH_AND_OR_SI);
7348 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7349 IA64_BUILTIN_FETCH_AND_AND_SI);
7350 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7351 IA64_BUILTIN_FETCH_AND_XOR_SI);
7352 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7353 IA64_BUILTIN_FETCH_AND_NAND_SI);
7355 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7356 IA64_BUILTIN_ADD_AND_FETCH_SI);
7357 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7358 IA64_BUILTIN_SUB_AND_FETCH_SI);
7359 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7360 IA64_BUILTIN_OR_AND_FETCH_SI);
7361 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7362 IA64_BUILTIN_AND_AND_FETCH_SI);
7363 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7364 IA64_BUILTIN_XOR_AND_FETCH_SI);
7365 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7366 IA64_BUILTIN_NAND_AND_FETCH_SI);
7368 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7369 IA64_BUILTIN_FETCH_AND_ADD_DI);
7370 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7371 IA64_BUILTIN_FETCH_AND_SUB_DI);
7372 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7373 IA64_BUILTIN_FETCH_AND_OR_DI);
7374 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7375 IA64_BUILTIN_FETCH_AND_AND_DI);
7376 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7377 IA64_BUILTIN_FETCH_AND_XOR_DI);
7378 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7379 IA64_BUILTIN_FETCH_AND_NAND_DI);
7381 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7382 IA64_BUILTIN_ADD_AND_FETCH_DI);
7383 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7384 IA64_BUILTIN_SUB_AND_FETCH_DI);
7385 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7386 IA64_BUILTIN_OR_AND_FETCH_DI);
7387 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7388 IA64_BUILTIN_AND_AND_FETCH_DI);
7389 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7390 IA64_BUILTIN_XOR_AND_FETCH_DI);
7391 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7392 IA64_BUILTIN_NAND_AND_FETCH_DI);
7397 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7405 cmpxchgsz.acq tmp = [ptr], tmp
7406 } while (tmp != ret)
7410 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7412 enum machine_mode mode;
7416 rtx ret, label, tmp, ccv, insn, mem, value;
7419 arg0 = TREE_VALUE (arglist);
7420 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7421 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7422 value = expand_expr (arg1, NULL_RTX, mode, 0);
7424 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7425 MEM_VOLATILE_P (mem) = 1;
7427 if (target && register_operand (target, mode))
7430 ret = gen_reg_rtx (mode);
7432 emit_insn (gen_mf ());
7434 /* Special case for fetchadd instructions. */
7435 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7438 insn = gen_fetchadd_acq_si (ret, mem, value);
7440 insn = gen_fetchadd_acq_di (ret, mem, value);
7445 tmp = gen_reg_rtx (mode);
7446 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7447 emit_move_insn (tmp, mem);
7449 label = gen_label_rtx ();
7451 emit_move_insn (ret, tmp);
7452 emit_move_insn (ccv, tmp);
7454 /* Perform the specific operation. Special case NAND by noticing
7455 one_cmpl_optab instead. */
7456 if (binoptab == one_cmpl_optab)
7458 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7459 binoptab = and_optab;
7461 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7464 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7466 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7469 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7474 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7482 cmpxchgsz.acq tmp = [ptr], ret
7483 } while (tmp != old)
7487 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7489 enum machine_mode mode;
7493 rtx old, label, tmp, ret, ccv, insn, mem, value;
7496 arg0 = TREE_VALUE (arglist);
7497 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7498 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7499 value = expand_expr (arg1, NULL_RTX, mode, 0);
7501 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7502 MEM_VOLATILE_P (mem) = 1;
7504 if (target && ! register_operand (target, mode))
7507 emit_insn (gen_mf ());
7508 tmp = gen_reg_rtx (mode);
7509 old = gen_reg_rtx (mode);
7510 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7512 emit_move_insn (tmp, mem);
7514 label = gen_label_rtx ();
7516 emit_move_insn (old, tmp);
7517 emit_move_insn (ccv, tmp);
7519 /* Perform the specific operation. Special case NAND by noticing
7520 one_cmpl_optab instead. */
7521 if (binoptab == one_cmpl_optab)
7523 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7524 binoptab = and_optab;
7526 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7529 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7531 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7534 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7539 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7543 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7546 For bool_ it's the same except return ret == oldval.
7550 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7551 enum machine_mode mode;
7556 tree arg0, arg1, arg2;
7557 rtx mem, old, new, ccv, tmp, insn;
7559 arg0 = TREE_VALUE (arglist);
7560 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7561 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7562 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7563 old = expand_expr (arg1, NULL_RTX, mode, 0);
7564 new = expand_expr (arg2, NULL_RTX, mode, 0);
7566 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7567 MEM_VOLATILE_P (mem) = 1;
7569 if (! register_operand (old, mode))
7570 old = copy_to_mode_reg (mode, old);
7571 if (! register_operand (new, mode))
7572 new = copy_to_mode_reg (mode, new);
7574 if (! boolp && target && register_operand (target, mode))
7577 tmp = gen_reg_rtx (mode);
7579 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7580 emit_move_insn (ccv, old);
7581 emit_insn (gen_mf ());
7583 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7585 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7591 target = gen_reg_rtx (mode);
7592 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7598 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7601 ia64_expand_lock_test_and_set (mode, arglist, target)
7602 enum machine_mode mode;
7607 rtx mem, new, ret, insn;
7609 arg0 = TREE_VALUE (arglist);
7610 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7611 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7612 new = expand_expr (arg1, NULL_RTX, mode, 0);
7614 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7615 MEM_VOLATILE_P (mem) = 1;
7616 if (! register_operand (new, mode))
7617 new = copy_to_mode_reg (mode, new);
7619 if (target && register_operand (target, mode))
7622 ret = gen_reg_rtx (mode);
7625 insn = gen_xchgsi (ret, mem, new);
7627 insn = gen_xchgdi (ret, mem, new);
7633 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7636 ia64_expand_lock_release (mode, arglist, target)
7637 enum machine_mode mode;
7639 rtx target ATTRIBUTE_UNUSED;
7644 arg0 = TREE_VALUE (arglist);
7645 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7647 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7648 MEM_VOLATILE_P (mem) = 1;
7650 emit_move_insn (mem, const0_rtx);
7656 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7659 rtx subtarget ATTRIBUTE_UNUSED;
7660 enum machine_mode mode ATTRIBUTE_UNUSED;
7661 int ignore ATTRIBUTE_UNUSED;
7663 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7664 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7665 tree arglist = TREE_OPERAND (exp, 1);
7669 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7670 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7671 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7672 case IA64_BUILTIN_LOCK_RELEASE_SI:
7673 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7674 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7675 case IA64_BUILTIN_FETCH_AND_OR_SI:
7676 case IA64_BUILTIN_FETCH_AND_AND_SI:
7677 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7678 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7679 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7680 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7681 case IA64_BUILTIN_OR_AND_FETCH_SI:
7682 case IA64_BUILTIN_AND_AND_FETCH_SI:
7683 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7684 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7688 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7689 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7690 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7691 case IA64_BUILTIN_LOCK_RELEASE_DI:
7692 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7693 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7694 case IA64_BUILTIN_FETCH_AND_OR_DI:
7695 case IA64_BUILTIN_FETCH_AND_AND_DI:
7696 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7697 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7698 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7699 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7700 case IA64_BUILTIN_OR_AND_FETCH_DI:
7701 case IA64_BUILTIN_AND_AND_FETCH_DI:
7702 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7703 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7713 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7714 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7715 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7717 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7718 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7719 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7721 case IA64_BUILTIN_SYNCHRONIZE:
7722 emit_insn (gen_mf ());
7725 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7726 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7727 return ia64_expand_lock_test_and_set (mode, arglist, target);
7729 case IA64_BUILTIN_LOCK_RELEASE_SI:
7730 case IA64_BUILTIN_LOCK_RELEASE_DI:
7731 return ia64_expand_lock_release (mode, arglist, target);
7733 case IA64_BUILTIN_BSP:
7734 if (! target || ! register_operand (target, DImode))
7735 target = gen_reg_rtx (DImode);
7736 emit_insn (gen_bsp_value (target));
7739 case IA64_BUILTIN_FLUSHRS:
7740 emit_insn (gen_flushrs ());
7743 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7744 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7745 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7747 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7748 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7749 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7751 case IA64_BUILTIN_FETCH_AND_OR_SI:
7752 case IA64_BUILTIN_FETCH_AND_OR_DI:
7753 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7755 case IA64_BUILTIN_FETCH_AND_AND_SI:
7756 case IA64_BUILTIN_FETCH_AND_AND_DI:
7757 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7759 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7760 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7761 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7763 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7764 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7765 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7767 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7768 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7769 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7771 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7772 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7773 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7775 case IA64_BUILTIN_OR_AND_FETCH_SI:
7776 case IA64_BUILTIN_OR_AND_FETCH_DI:
7777 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7779 case IA64_BUILTIN_AND_AND_FETCH_SI:
7780 case IA64_BUILTIN_AND_AND_FETCH_DI:
7781 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7783 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7784 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7785 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7787 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7788 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7789 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7798 /* For the HP-UX IA64 aggregate parameters are passed stored in the
7799 most significant bits of the stack slot. */
7802 ia64_hpux_function_arg_padding (mode, type)
7803 enum machine_mode mode;
7806 /* Exception to normal case for structures/unions/etc. */
7808 if (type && AGGREGATE_TYPE_P (type)
7809 && int_size_in_bytes (type) < UNITS_PER_WORD)
7812 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
7813 hardwired to be true. */
7815 return((mode == BLKmode
7816 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7817 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
7818 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
7819 ? downward : upward);