1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52 int ia64_asm_output_label = 0;
54 /* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56 struct rtx_def * ia64_compare_op0;
57 struct rtx_def * ia64_compare_op1;
59 /* Register names for ia64_expand_prologue. */
60 static const char * const ia64_reg_numbers[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_input_reg_names[8] =
76 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_local_reg_names[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
91 /* ??? These strings could be shared with REGISTER_NAMES. */
92 static const char * const ia64_output_reg_names[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
95 /* String used with the -mfixed-range= option. */
96 const char *ia64_fixed_range_string;
98 /* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100 static int ia64_flag_schedule_insns2;
102 /* Variables which are this size or smaller are put in the sdata/sbss
105 unsigned int ia64_section_threshold;
107 static int find_gr_spill PARAMS ((int));
108 static int next_scratch_gr_reg PARAMS ((void));
109 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
110 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
111 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
112 static void finish_spill_pointers PARAMS ((void));
113 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
114 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
115 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
116 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
118 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
120 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
121 static void fix_range PARAMS ((const char *));
122 static void ia64_add_gc_roots PARAMS ((void));
123 static void ia64_init_machine_status PARAMS ((struct function *));
124 static void ia64_mark_machine_status PARAMS ((struct function *));
125 static void ia64_free_machine_status PARAMS ((struct function *));
126 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
128 static void emit_predicate_relation_info PARAMS ((void));
129 static void process_epilogue PARAMS ((void));
130 static int process_set PARAMS ((FILE *, rtx));
132 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
134 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
136 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
138 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
140 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
141 const struct attribute_spec ia64_attribute_table[];
142 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
143 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
144 static void ia64_output_function_end_prologue PARAMS ((FILE *));
146 static int ia64_issue_rate PARAMS ((void));
147 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
148 static void ia64_sched_init PARAMS ((FILE *, int, int));
149 static void ia64_sched_finish PARAMS ((FILE *, int));
150 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
152 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
153 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
154 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
155 static rtx ia64_cycle_display PARAMS ((int, rtx));
158 /* Initialize the GCC target structure. */
159 #undef TARGET_ATTRIBUTE_TABLE
160 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
162 #undef TARGET_INIT_BUILTINS
163 #define TARGET_INIT_BUILTINS ia64_init_builtins
165 #undef TARGET_EXPAND_BUILTIN
166 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
168 #undef TARGET_ASM_FUNCTION_PROLOGUE
169 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
170 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
171 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
172 #undef TARGET_ASM_FUNCTION_EPILOGUE
173 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
175 #undef TARGET_SCHED_ADJUST_COST
176 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
177 #undef TARGET_SCHED_ISSUE_RATE
178 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
179 #undef TARGET_SCHED_VARIABLE_ISSUE
180 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
181 #undef TARGET_SCHED_INIT
182 #define TARGET_SCHED_INIT ia64_sched_init
183 #undef TARGET_SCHED_FINISH
184 #define TARGET_SCHED_FINISH ia64_sched_finish
185 #undef TARGET_SCHED_REORDER
186 #define TARGET_SCHED_REORDER ia64_sched_reorder
187 #undef TARGET_SCHED_REORDER2
188 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
189 #undef TARGET_SCHED_CYCLE_DISPLAY
190 #define TARGET_SCHED_CYCLE_DISPLAY ia64_cycle_display
192 struct gcc_target targetm = TARGET_INITIALIZER;
194 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
197 call_operand (op, mode)
199 enum machine_mode mode;
201 if (mode != GET_MODE (op))
204 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
205 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
208 /* Return 1 if OP refers to a symbol in the sdata section. */
211 sdata_symbolic_operand (op, mode)
213 enum machine_mode mode ATTRIBUTE_UNUSED;
215 switch (GET_CODE (op))
218 if (GET_CODE (XEXP (op, 0)) != PLUS
219 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
221 op = XEXP (XEXP (op, 0), 0);
225 if (CONSTANT_POOL_ADDRESS_P (op))
226 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
228 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
237 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
240 got_symbolic_operand (op, mode)
242 enum machine_mode mode ATTRIBUTE_UNUSED;
244 switch (GET_CODE (op))
248 if (GET_CODE (op) != PLUS)
250 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
253 if (GET_CODE (op) != CONST_INT)
258 /* Ok if we're not using GOT entries at all. */
259 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
262 /* "Ok" while emitting rtl, since otherwise we won't be provided
263 with the entire offset during emission, which makes it very
264 hard to split the offset into high and low parts. */
265 if (rtx_equal_function_value_matters)
268 /* Force the low 14 bits of the constant to zero so that we do not
269 use up so many GOT entries. */
270 return (INTVAL (op) & 0x3fff) == 0;
282 /* Return 1 if OP refers to a symbol. */
285 symbolic_operand (op, mode)
287 enum machine_mode mode ATTRIBUTE_UNUSED;
289 switch (GET_CODE (op))
302 /* Return 1 if OP refers to a function. */
305 function_operand (op, mode)
307 enum machine_mode mode ATTRIBUTE_UNUSED;
309 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
315 /* Return 1 if OP is setjmp or a similar function. */
317 /* ??? This is an unsatisfying solution. Should rethink. */
320 setjmp_operand (op, mode)
322 enum machine_mode mode ATTRIBUTE_UNUSED;
327 if (GET_CODE (op) != SYMBOL_REF)
332 /* The following code is borrowed from special_function_p in calls.c. */
334 /* Disregard prefix _, __ or __x. */
337 if (name[1] == '_' && name[2] == 'x')
339 else if (name[1] == '_')
349 && (! strcmp (name, "setjmp")
350 || ! strcmp (name, "setjmp_syscall")))
352 && ! strcmp (name, "sigsetjmp"))
354 && ! strcmp (name, "savectx")));
356 else if ((name[0] == 'q' && name[1] == 's'
357 && ! strcmp (name, "qsetjmp"))
358 || (name[0] == 'v' && name[1] == 'f'
359 && ! strcmp (name, "vfork")))
365 /* Return 1 if OP is a general operand, but when pic exclude symbolic
368 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
369 from PREDICATE_CODES. */
372 move_operand (op, mode)
374 enum machine_mode mode;
376 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
379 return general_operand (op, mode);
382 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
385 gr_register_operand (op, mode)
387 enum machine_mode mode;
389 if (! register_operand (op, mode))
391 if (GET_CODE (op) == SUBREG)
392 op = SUBREG_REG (op);
393 if (GET_CODE (op) == REG)
395 unsigned int regno = REGNO (op);
396 if (regno < FIRST_PSEUDO_REGISTER)
397 return GENERAL_REGNO_P (regno);
402 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
405 fr_register_operand (op, mode)
407 enum machine_mode mode;
409 if (! register_operand (op, mode))
411 if (GET_CODE (op) == SUBREG)
412 op = SUBREG_REG (op);
413 if (GET_CODE (op) == REG)
415 unsigned int regno = REGNO (op);
416 if (regno < FIRST_PSEUDO_REGISTER)
417 return FR_REGNO_P (regno);
422 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
425 grfr_register_operand (op, mode)
427 enum machine_mode mode;
429 if (! register_operand (op, mode))
431 if (GET_CODE (op) == SUBREG)
432 op = SUBREG_REG (op);
433 if (GET_CODE (op) == REG)
435 unsigned int regno = REGNO (op);
436 if (regno < FIRST_PSEUDO_REGISTER)
437 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
442 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
445 gr_nonimmediate_operand (op, mode)
447 enum machine_mode mode;
449 if (! nonimmediate_operand (op, mode))
451 if (GET_CODE (op) == SUBREG)
452 op = SUBREG_REG (op);
453 if (GET_CODE (op) == REG)
455 unsigned int regno = REGNO (op);
456 if (regno < FIRST_PSEUDO_REGISTER)
457 return GENERAL_REGNO_P (regno);
462 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
465 fr_nonimmediate_operand (op, mode)
467 enum machine_mode mode;
469 if (! nonimmediate_operand (op, mode))
471 if (GET_CODE (op) == SUBREG)
472 op = SUBREG_REG (op);
473 if (GET_CODE (op) == REG)
475 unsigned int regno = REGNO (op);
476 if (regno < FIRST_PSEUDO_REGISTER)
477 return FR_REGNO_P (regno);
482 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
485 grfr_nonimmediate_operand (op, mode)
487 enum machine_mode mode;
489 if (! nonimmediate_operand (op, mode))
491 if (GET_CODE (op) == SUBREG)
492 op = SUBREG_REG (op);
493 if (GET_CODE (op) == REG)
495 unsigned int regno = REGNO (op);
496 if (regno < FIRST_PSEUDO_REGISTER)
497 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
502 /* Return 1 if OP is a GR register operand, or zero. */
505 gr_reg_or_0_operand (op, mode)
507 enum machine_mode mode;
509 return (op == const0_rtx || gr_register_operand (op, mode));
512 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
515 gr_reg_or_5bit_operand (op, mode)
517 enum machine_mode mode;
519 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
520 || GET_CODE (op) == CONSTANT_P_RTX
521 || gr_register_operand (op, mode));
524 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
527 gr_reg_or_6bit_operand (op, mode)
529 enum machine_mode mode;
531 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
532 || GET_CODE (op) == CONSTANT_P_RTX
533 || gr_register_operand (op, mode));
536 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
539 gr_reg_or_8bit_operand (op, mode)
541 enum machine_mode mode;
543 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
544 || GET_CODE (op) == CONSTANT_P_RTX
545 || gr_register_operand (op, mode));
548 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
551 grfr_reg_or_8bit_operand (op, mode)
553 enum machine_mode mode;
555 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
556 || GET_CODE (op) == CONSTANT_P_RTX
557 || grfr_register_operand (op, mode));
560 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
564 gr_reg_or_8bit_adjusted_operand (op, mode)
566 enum machine_mode mode;
568 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
569 || GET_CODE (op) == CONSTANT_P_RTX
570 || gr_register_operand (op, mode));
573 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
574 immediate and an 8 bit adjusted immediate operand. This is necessary
575 because when we emit a compare, we don't know what the condition will be,
576 so we need the union of the immediates accepted by GT and LT. */
579 gr_reg_or_8bit_and_adjusted_operand (op, mode)
581 enum machine_mode mode;
583 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
584 && CONST_OK_FOR_L (INTVAL (op)))
585 || GET_CODE (op) == CONSTANT_P_RTX
586 || gr_register_operand (op, mode));
589 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
592 gr_reg_or_14bit_operand (op, mode)
594 enum machine_mode mode;
596 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
597 || GET_CODE (op) == CONSTANT_P_RTX
598 || gr_register_operand (op, mode));
601 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
604 gr_reg_or_22bit_operand (op, mode)
606 enum machine_mode mode;
608 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
609 || GET_CODE (op) == CONSTANT_P_RTX
610 || gr_register_operand (op, mode));
613 /* Return 1 if OP is a 6 bit immediate operand. */
616 shift_count_operand (op, mode)
618 enum machine_mode mode ATTRIBUTE_UNUSED;
620 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
621 || GET_CODE (op) == CONSTANT_P_RTX);
624 /* Return 1 if OP is a 5 bit immediate operand. */
627 shift_32bit_count_operand (op, mode)
629 enum machine_mode mode ATTRIBUTE_UNUSED;
631 return ((GET_CODE (op) == CONST_INT
632 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
633 || GET_CODE (op) == CONSTANT_P_RTX);
636 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
639 shladd_operand (op, mode)
641 enum machine_mode mode ATTRIBUTE_UNUSED;
643 return (GET_CODE (op) == CONST_INT
644 && (INTVAL (op) == 2 || INTVAL (op) == 4
645 || INTVAL (op) == 8 || INTVAL (op) == 16));
648 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
651 fetchadd_operand (op, mode)
653 enum machine_mode mode ATTRIBUTE_UNUSED;
655 return (GET_CODE (op) == CONST_INT
656 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
657 INTVAL (op) == -4 || INTVAL (op) == -1 ||
658 INTVAL (op) == 1 || INTVAL (op) == 4 ||
659 INTVAL (op) == 8 || INTVAL (op) == 16));
662 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
665 fr_reg_or_fp01_operand (op, mode)
667 enum machine_mode mode;
669 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
670 || fr_register_operand (op, mode));
673 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
674 POST_MODIFY with a REG as displacement. */
677 destination_operand (op, mode)
679 enum machine_mode mode;
681 if (! nonimmediate_operand (op, mode))
683 if (GET_CODE (op) == MEM
684 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
685 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
690 /* Like memory_operand, but don't allow post-increments. */
693 not_postinc_memory_operand (op, mode)
695 enum machine_mode mode;
697 return (memory_operand (op, mode)
698 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
701 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
702 signed immediate operand. */
705 normal_comparison_operator (op, mode)
707 enum machine_mode mode;
709 enum rtx_code code = GET_CODE (op);
710 return ((mode == VOIDmode || GET_MODE (op) == mode)
711 && (code == EQ || code == NE
712 || code == GT || code == LE || code == GTU || code == LEU));
715 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
716 signed immediate operand. */
719 adjusted_comparison_operator (op, mode)
721 enum machine_mode mode;
723 enum rtx_code code = GET_CODE (op);
724 return ((mode == VOIDmode || GET_MODE (op) == mode)
725 && (code == LT || code == GE || code == LTU || code == GEU));
728 /* Return 1 if this is a signed inequality operator. */
731 signed_inequality_operator (op, mode)
733 enum machine_mode mode;
735 enum rtx_code code = GET_CODE (op);
736 return ((mode == VOIDmode || GET_MODE (op) == mode)
737 && (code == GE || code == GT
738 || code == LE || code == LT));
741 /* Return 1 if this operator is valid for predication. */
744 predicate_operator (op, mode)
746 enum machine_mode mode;
748 enum rtx_code code = GET_CODE (op);
749 return ((GET_MODE (op) == mode || mode == VOIDmode)
750 && (code == EQ || code == NE));
753 /* Return 1 if this operator can be used in a conditional operation. */
756 condop_operator (op, mode)
758 enum machine_mode mode;
760 enum rtx_code code = GET_CODE (op);
761 return ((GET_MODE (op) == mode || mode == VOIDmode)
762 && (code == PLUS || code == MINUS || code == AND
763 || code == IOR || code == XOR));
766 /* Return 1 if this is the ar.lc register. */
769 ar_lc_reg_operand (op, mode)
771 enum machine_mode mode;
773 return (GET_MODE (op) == DImode
774 && (mode == DImode || mode == VOIDmode)
775 && GET_CODE (op) == REG
776 && REGNO (op) == AR_LC_REGNUM);
779 /* Return 1 if this is the ar.ccv register. */
782 ar_ccv_reg_operand (op, mode)
784 enum machine_mode mode;
786 return ((GET_MODE (op) == mode || mode == VOIDmode)
787 && GET_CODE (op) == REG
788 && REGNO (op) == AR_CCV_REGNUM);
791 /* Return 1 if this is the ar.pfs register. */
794 ar_pfs_reg_operand (op, mode)
796 enum machine_mode mode;
798 return ((GET_MODE (op) == mode || mode == VOIDmode)
799 && GET_CODE (op) == REG
800 && REGNO (op) == AR_PFS_REGNUM);
803 /* Like general_operand, but don't allow (mem (addressof)). */
806 general_tfmode_operand (op, mode)
808 enum machine_mode mode;
810 if (! general_operand (op, mode))
812 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
820 destination_tfmode_operand (op, mode)
822 enum machine_mode mode;
824 if (! destination_operand (op, mode))
826 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
834 tfreg_or_fp01_operand (op, mode)
836 enum machine_mode mode;
838 if (GET_CODE (op) == SUBREG)
840 return fr_reg_or_fp01_operand (op, mode);
843 /* Return 1 if the operands of a move are ok. */
846 ia64_move_ok (dst, src)
849 /* If we're under init_recog_no_volatile, we'll not be able to use
850 memory_operand. So check the code directly and don't worry about
851 the validity of the underlying address, which should have been
852 checked elsewhere anyway. */
853 if (GET_CODE (dst) != MEM)
855 if (GET_CODE (src) == MEM)
857 if (register_operand (src, VOIDmode))
860 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
861 if (INTEGRAL_MODE_P (GET_MODE (dst)))
862 return src == const0_rtx;
864 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
867 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
868 Return the length of the field, or <= 0 on failure. */
871 ia64_depz_field_mask (rop, rshift)
874 unsigned HOST_WIDE_INT op = INTVAL (rop);
875 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
877 /* Get rid of the zero bits we're shifting in. */
880 /* We must now have a solid block of 1's at bit 0. */
881 return exact_log2 (op + 1);
884 /* Expand a symbolic constant load. */
885 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
888 ia64_expand_load_address (dest, src, scratch)
889 rtx dest, src, scratch;
893 /* The destination could be a MEM during initial rtl generation,
894 which isn't a valid destination for the PIC load address patterns. */
895 if (! register_operand (dest, DImode))
896 temp = gen_reg_rtx (DImode);
901 emit_insn (gen_load_gprel64 (temp, src));
902 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
903 emit_insn (gen_load_fptr (temp, src));
904 else if (sdata_symbolic_operand (src, DImode))
905 emit_insn (gen_load_gprel (temp, src));
906 else if (GET_CODE (src) == CONST
907 && GET_CODE (XEXP (src, 0)) == PLUS
908 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
909 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
911 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
912 rtx sym = XEXP (XEXP (src, 0), 0);
913 HOST_WIDE_INT ofs, hi, lo;
915 /* Split the offset into a sign extended 14-bit low part
916 and a complementary high part. */
917 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
918 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
922 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
924 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
926 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
932 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
934 insn = emit_insn (gen_load_symptr (temp, src, scratch));
935 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
939 emit_move_insn (dest, temp);
943 ia64_gp_save_reg (setjmp_p)
946 rtx save = cfun->machine->ia64_gp_save;
950 /* We can't save GP in a pseudo if we are calling setjmp, because
951 pseudos won't be restored by longjmp. For now, we save it in r4. */
952 /* ??? It would be more efficient to save this directly into a stack
953 slot. Unfortunately, the stack slot address gets cse'd across
954 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
957 /* ??? Get the barf bag, Virginia. We've got to replace this thing
958 in place, since this rtx is used in exception handling receivers.
959 Moreover, we must get this rtx out of regno_reg_rtx or reload
960 will do the wrong thing. */
961 unsigned int old_regno = REGNO (save);
962 if (setjmp_p && old_regno != GR_REG (4))
964 REGNO (save) = GR_REG (4);
965 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
971 save = gen_rtx_REG (DImode, GR_REG (4));
973 save = gen_rtx_REG (DImode, LOC_REG (0));
975 save = gen_reg_rtx (DImode);
976 cfun->machine->ia64_gp_save = save;
982 /* Split a post-reload TImode reference into two DImode components. */
985 ia64_split_timode (out, in, scratch)
989 switch (GET_CODE (in))
992 out[0] = gen_rtx_REG (DImode, REGNO (in));
993 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
998 rtx base = XEXP (in, 0);
1000 switch (GET_CODE (base))
1003 out[0] = adjust_address (in, DImode, 0);
1006 base = XEXP (base, 0);
1007 out[0] = adjust_address (in, DImode, 0);
1010 /* Since we're changing the mode, we need to change to POST_MODIFY
1011 as well to preserve the size of the increment. Either that or
1012 do the update in two steps, but we've already got this scratch
1013 register handy so let's use it. */
1015 base = XEXP (base, 0);
1017 = change_address (in, DImode,
1019 (Pmode, base, plus_constant (base, 16)));
1022 base = XEXP (base, 0);
1024 = change_address (in, DImode,
1026 (Pmode, base, plus_constant (base, -16)));
1032 if (scratch == NULL_RTX)
1034 out[1] = change_address (in, DImode, scratch);
1035 return gen_adddi3 (scratch, base, GEN_INT (8));
1040 split_double (in, &out[0], &out[1]);
1048 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1049 through memory plus an extra GR scratch register. Except that you can
1050 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1051 SECONDARY_RELOAD_CLASS, but not both.
1053 We got into problems in the first place by allowing a construct like
1054 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1055 This solution attempts to prevent this situation from occurring. When
1056 we see something like the above, we spill the inner register to memory. */
1059 spill_tfmode_operand (in, force)
1063 if (GET_CODE (in) == SUBREG
1064 && GET_MODE (SUBREG_REG (in)) == TImode
1065 && GET_CODE (SUBREG_REG (in)) == REG)
1067 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1068 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1070 else if (force && GET_CODE (in) == REG)
1072 rtx mem = gen_mem_addressof (in, NULL_TREE);
1073 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1075 else if (GET_CODE (in) == MEM
1076 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1077 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1082 /* Emit comparison instruction if necessary, returning the expression
1083 that holds the compare result in the proper mode. */
1086 ia64_expand_compare (code, mode)
1088 enum machine_mode mode;
1090 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1093 /* If we have a BImode input, then we already have a compare result, and
1094 do not need to emit another comparison. */
1095 if (GET_MODE (op0) == BImode)
1097 if ((code == NE || code == EQ) && op1 == const0_rtx)
1104 cmp = gen_reg_rtx (BImode);
1105 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1106 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1110 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1113 /* Emit the appropriate sequence for a call. */
1116 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1122 rtx insn, b0, pfs, gp_save, narg_rtx;
1125 addr = XEXP (addr, 0);
1126 b0 = gen_rtx_REG (DImode, R_BR (0));
1127 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1131 else if (IN_REGNO_P (REGNO (nextarg)))
1132 narg = REGNO (nextarg) - IN_REG (0);
1134 narg = REGNO (nextarg) - OUT_REG (0);
1135 narg_rtx = GEN_INT (narg);
1137 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1140 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1142 insn = gen_call_nopic (addr, narg_rtx, b0);
1144 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1145 emit_call_insn (insn);
1152 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1154 /* If this is an indirect call, then we have the address of a descriptor. */
1155 if (! symbolic_operand (addr, VOIDmode))
1160 emit_move_insn (gp_save, pic_offset_table_rtx);
1162 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1163 emit_move_insn (pic_offset_table_rtx,
1164 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1167 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1169 insn = gen_call_pic (dest, narg_rtx, b0);
1171 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1172 emit_call_insn (insn);
1175 emit_move_insn (pic_offset_table_rtx, gp_save);
1177 else if (TARGET_CONST_GP)
1180 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1182 insn = gen_call_nopic (addr, narg_rtx, b0);
1184 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1185 emit_call_insn (insn);
1190 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0, pfs));
1193 emit_move_insn (gp_save, pic_offset_table_rtx);
1196 insn = gen_call_pic (addr, narg_rtx, b0);
1198 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1199 emit_call_insn (insn);
1201 emit_move_insn (pic_offset_table_rtx, gp_save);
1206 /* Begin the assembly file. */
1209 emit_safe_across_calls (f)
1212 unsigned int rs, re;
1219 while (rs < 64 && call_used_regs[PR_REG (rs)])
1223 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1227 fputs ("\t.pred.safe_across_calls ", f);
1233 fprintf (f, "p%u", rs);
1235 fprintf (f, "p%u-p%u", rs, re - 1);
1243 /* Structure to be filled in by ia64_compute_frame_size with register
1244 save masks and offsets for the current function. */
1246 struct ia64_frame_info
1248 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1249 the caller's scratch area. */
1250 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1251 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1252 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1253 HARD_REG_SET mask; /* mask of saved registers. */
1254 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1255 registers or long-term scratches. */
1256 int n_spilled; /* number of spilled registers. */
1257 int reg_fp; /* register for fp. */
1258 int reg_save_b0; /* save register for b0. */
1259 int reg_save_pr; /* save register for prs. */
1260 int reg_save_ar_pfs; /* save register for ar.pfs. */
1261 int reg_save_ar_unat; /* save register for ar.unat. */
1262 int reg_save_ar_lc; /* save register for ar.lc. */
1263 int n_input_regs; /* number of input registers used. */
1264 int n_local_regs; /* number of local registers used. */
1265 int n_output_regs; /* number of output registers used. */
1266 int n_rotate_regs; /* number of rotating registers used. */
1268 char need_regstk; /* true if a .regstk directive needed. */
1269 char initialized; /* true if the data is finalized. */
1272 /* Current frame information calculated by ia64_compute_frame_size. */
1273 static struct ia64_frame_info current_frame_info;
1275 /* Helper function for ia64_compute_frame_size: find an appropriate general
1276 register to spill some special register to. SPECIAL_SPILL_MASK contains
1277 bits in GR0 to GR31 that have already been allocated by this routine.
1278 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1281 find_gr_spill (try_locals)
1286 /* If this is a leaf function, first try an otherwise unused
1287 call-clobbered register. */
1288 if (current_function_is_leaf)
1290 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1291 if (! regs_ever_live[regno]
1292 && call_used_regs[regno]
1293 && ! fixed_regs[regno]
1294 && ! global_regs[regno]
1295 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1297 current_frame_info.gr_used_mask |= 1 << regno;
1304 regno = current_frame_info.n_local_regs;
1305 /* If there is a frame pointer, then we can't use loc79, because
1306 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1307 reg_name switching code in ia64_expand_prologue. */
1308 if (regno < (80 - frame_pointer_needed))
1310 current_frame_info.n_local_regs = regno + 1;
1311 return LOC_REG (0) + regno;
1315 /* Failed to find a general register to spill to. Must use stack. */
1319 /* In order to make for nice schedules, we try to allocate every temporary
1320 to a different register. We must of course stay away from call-saved,
1321 fixed, and global registers. We must also stay away from registers
1322 allocated in current_frame_info.gr_used_mask, since those include regs
1323 used all through the prologue.
1325 Any register allocated here must be used immediately. The idea is to
1326 aid scheduling, not to solve data flow problems. */
1328 static int last_scratch_gr_reg;
1331 next_scratch_gr_reg ()
1335 for (i = 0; i < 32; ++i)
1337 regno = (last_scratch_gr_reg + i + 1) & 31;
1338 if (call_used_regs[regno]
1339 && ! fixed_regs[regno]
1340 && ! global_regs[regno]
1341 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1343 last_scratch_gr_reg = regno;
1348 /* There must be _something_ available. */
1352 /* Helper function for ia64_compute_frame_size, called through
1353 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1356 mark_reg_gr_used_mask (reg, data)
1358 void *data ATTRIBUTE_UNUSED;
1360 unsigned int regno = REGNO (reg);
1362 current_frame_info.gr_used_mask |= 1 << regno;
1365 /* Returns the number of bytes offset between the frame pointer and the stack
1366 pointer for the current function. SIZE is the number of bytes of space
1367 needed for local variables. */
1370 ia64_compute_frame_size (size)
1373 HOST_WIDE_INT total_size;
1374 HOST_WIDE_INT spill_size = 0;
1375 HOST_WIDE_INT extra_spill_size = 0;
1376 HOST_WIDE_INT pretend_args_size;
1379 int spilled_gr_p = 0;
1380 int spilled_fr_p = 0;
1384 if (current_frame_info.initialized)
1387 memset (¤t_frame_info, 0, sizeof current_frame_info);
1388 CLEAR_HARD_REG_SET (mask);
1390 /* Don't allocate scratches to the return register. */
1391 diddle_return_value (mark_reg_gr_used_mask, NULL);
1393 /* Don't allocate scratches to the EH scratch registers. */
1394 if (cfun->machine->ia64_eh_epilogue_sp)
1395 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1396 if (cfun->machine->ia64_eh_epilogue_bsp)
1397 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1399 /* Find the size of the register stack frame. We have only 80 local
1400 registers, because we reserve 8 for the inputs and 8 for the
1403 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1404 since we'll be adjusting that down later. */
1405 regno = LOC_REG (78) + ! frame_pointer_needed;
1406 for (; regno >= LOC_REG (0); regno--)
1407 if (regs_ever_live[regno])
1409 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1411 /* For functions marked with the syscall_linkage attribute, we must mark
1412 all eight input registers as in use, so that locals aren't visible to
1415 if (cfun->machine->n_varargs > 0
1416 || lookup_attribute ("syscall_linkage",
1417 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1418 current_frame_info.n_input_regs = 8;
1421 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1422 if (regs_ever_live[regno])
1424 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1427 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1428 if (regs_ever_live[regno])
1430 i = regno - OUT_REG (0) + 1;
1432 /* When -p profiling, we need one output register for the mcount argument.
1433 Likwise for -a profiling for the bb_init_func argument. For -ax
1434 profiling, we need two output registers for the two bb_init_trace_func
1436 if (profile_flag || profile_block_flag == 1)
1438 else if (profile_block_flag == 2)
1440 current_frame_info.n_output_regs = i;
1442 /* ??? No rotating register support yet. */
1443 current_frame_info.n_rotate_regs = 0;
1445 /* Discover which registers need spilling, and how much room that
1446 will take. Begin with floating point and general registers,
1447 which will always wind up on the stack. */
1449 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1450 if (regs_ever_live[regno] && ! call_used_regs[regno])
1452 SET_HARD_REG_BIT (mask, regno);
1458 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1459 if (regs_ever_live[regno] && ! call_used_regs[regno])
1461 SET_HARD_REG_BIT (mask, regno);
1467 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1468 if (regs_ever_live[regno] && ! call_used_regs[regno])
1470 SET_HARD_REG_BIT (mask, regno);
1475 /* Now come all special registers that might get saved in other
1476 general registers. */
1478 if (frame_pointer_needed)
1480 current_frame_info.reg_fp = find_gr_spill (1);
1481 /* If we did not get a register, then we take LOC79. This is guaranteed
1482 to be free, even if regs_ever_live is already set, because this is
1483 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1484 as we don't count loc79 above. */
1485 if (current_frame_info.reg_fp == 0)
1487 current_frame_info.reg_fp = LOC_REG (79);
1488 current_frame_info.n_local_regs++;
1492 if (! current_function_is_leaf)
1494 /* Emit a save of BR0 if we call other functions. Do this even
1495 if this function doesn't return, as EH depends on this to be
1496 able to unwind the stack. */
1497 SET_HARD_REG_BIT (mask, BR_REG (0));
1499 current_frame_info.reg_save_b0 = find_gr_spill (1);
1500 if (current_frame_info.reg_save_b0 == 0)
1506 /* Similarly for ar.pfs. */
1507 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1508 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1509 if (current_frame_info.reg_save_ar_pfs == 0)
1511 extra_spill_size += 8;
1517 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1519 SET_HARD_REG_BIT (mask, BR_REG (0));
1525 /* Unwind descriptor hackery: things are most efficient if we allocate
1526 consecutive GR save registers for RP, PFS, FP in that order. However,
1527 it is absolutely critical that FP get the only hard register that's
1528 guaranteed to be free, so we allocated it first. If all three did
1529 happen to be allocated hard regs, and are consecutive, rearrange them
1530 into the preferred order now. */
1531 if (current_frame_info.reg_fp != 0
1532 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1533 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1535 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1536 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1537 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1540 /* See if we need to store the predicate register block. */
1541 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1542 if (regs_ever_live[regno] && ! call_used_regs[regno])
1544 if (regno <= PR_REG (63))
1546 SET_HARD_REG_BIT (mask, PR_REG (0));
1547 current_frame_info.reg_save_pr = find_gr_spill (1);
1548 if (current_frame_info.reg_save_pr == 0)
1550 extra_spill_size += 8;
1554 /* ??? Mark them all as used so that register renaming and such
1555 are free to use them. */
1556 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1557 regs_ever_live[regno] = 1;
1560 /* If we're forced to use st8.spill, we're forced to save and restore
1562 if (spilled_gr_p || cfun->machine->n_varargs)
1564 regs_ever_live[AR_UNAT_REGNUM] = 1;
1565 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1566 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1567 if (current_frame_info.reg_save_ar_unat == 0)
1569 extra_spill_size += 8;
1574 if (regs_ever_live[AR_LC_REGNUM])
1576 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1577 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1578 if (current_frame_info.reg_save_ar_lc == 0)
1580 extra_spill_size += 8;
1585 /* If we have an odd number of words of pretend arguments written to
1586 the stack, then the FR save area will be unaligned. We round the
1587 size of this area up to keep things 16 byte aligned. */
1589 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1591 pretend_args_size = current_function_pretend_args_size;
1593 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1594 + current_function_outgoing_args_size);
1595 total_size = IA64_STACK_ALIGN (total_size);
1597 /* We always use the 16-byte scratch area provided by the caller, but
1598 if we are a leaf function, there's no one to which we need to provide
1600 if (current_function_is_leaf)
1601 total_size = MAX (0, total_size - 16);
1603 current_frame_info.total_size = total_size;
1604 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1605 current_frame_info.spill_size = spill_size;
1606 current_frame_info.extra_spill_size = extra_spill_size;
1607 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1608 current_frame_info.n_spilled = n_spilled;
1609 current_frame_info.initialized = reload_completed;
1612 /* Compute the initial difference between the specified pair of registers. */
1615 ia64_initial_elimination_offset (from, to)
1618 HOST_WIDE_INT offset;
1620 ia64_compute_frame_size (get_frame_size ());
1623 case FRAME_POINTER_REGNUM:
1624 if (to == HARD_FRAME_POINTER_REGNUM)
1626 if (current_function_is_leaf)
1627 offset = -current_frame_info.total_size;
1629 offset = -(current_frame_info.total_size
1630 - current_function_outgoing_args_size - 16);
1632 else if (to == STACK_POINTER_REGNUM)
1634 if (current_function_is_leaf)
1637 offset = 16 + current_function_outgoing_args_size;
1643 case ARG_POINTER_REGNUM:
1644 /* Arguments start above the 16 byte save area, unless stdarg
1645 in which case we store through the 16 byte save area. */
1646 if (to == HARD_FRAME_POINTER_REGNUM)
1647 offset = 16 - current_function_pretend_args_size;
1648 else if (to == STACK_POINTER_REGNUM)
1649 offset = (current_frame_info.total_size
1650 + 16 - current_function_pretend_args_size);
1655 case RETURN_ADDRESS_POINTER_REGNUM:
1666 /* If there are more than a trivial number of register spills, we use
1667 two interleaved iterators so that we can get two memory references
1670 In order to simplify things in the prologue and epilogue expanders,
1671 we use helper functions to fix up the memory references after the
1672 fact with the appropriate offsets to a POST_MODIFY memory mode.
1673 The following data structure tracks the state of the two iterators
1674 while insns are being emitted. */
1676 struct spill_fill_data
1678 rtx init_after; /* point at which to emit intializations */
1679 rtx init_reg[2]; /* initial base register */
1680 rtx iter_reg[2]; /* the iterator registers */
1681 rtx *prev_addr[2]; /* address of last memory use */
1682 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1683 HOST_WIDE_INT prev_off[2]; /* last offset */
1684 int n_iter; /* number of iterators in use */
1685 int next_iter; /* next iterator to use */
1686 unsigned int save_gr_used_mask;
1689 static struct spill_fill_data spill_fill_data;
1692 setup_spill_pointers (n_spills, init_reg, cfa_off)
1695 HOST_WIDE_INT cfa_off;
1699 spill_fill_data.init_after = get_last_insn ();
1700 spill_fill_data.init_reg[0] = init_reg;
1701 spill_fill_data.init_reg[1] = init_reg;
1702 spill_fill_data.prev_addr[0] = NULL;
1703 spill_fill_data.prev_addr[1] = NULL;
1704 spill_fill_data.prev_insn[0] = NULL;
1705 spill_fill_data.prev_insn[1] = NULL;
1706 spill_fill_data.prev_off[0] = cfa_off;
1707 spill_fill_data.prev_off[1] = cfa_off;
1708 spill_fill_data.next_iter = 0;
1709 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1711 spill_fill_data.n_iter = 1 + (n_spills > 2);
1712 for (i = 0; i < spill_fill_data.n_iter; ++i)
1714 int regno = next_scratch_gr_reg ();
1715 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1716 current_frame_info.gr_used_mask |= 1 << regno;
1721 finish_spill_pointers ()
1723 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1727 spill_restore_mem (reg, cfa_off)
1729 HOST_WIDE_INT cfa_off;
1731 int iter = spill_fill_data.next_iter;
1732 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1733 rtx disp_rtx = GEN_INT (disp);
1736 if (spill_fill_data.prev_addr[iter])
1738 if (CONST_OK_FOR_N (disp))
1740 *spill_fill_data.prev_addr[iter]
1741 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1742 gen_rtx_PLUS (DImode,
1743 spill_fill_data.iter_reg[iter],
1745 REG_NOTES (spill_fill_data.prev_insn[iter])
1746 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1747 REG_NOTES (spill_fill_data.prev_insn[iter]));
1751 /* ??? Could use register post_modify for loads. */
1752 if (! CONST_OK_FOR_I (disp))
1754 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1755 emit_move_insn (tmp, disp_rtx);
1758 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1759 spill_fill_data.iter_reg[iter], disp_rtx));
1762 /* Micro-optimization: if we've created a frame pointer, it's at
1763 CFA 0, which may allow the real iterator to be initialized lower,
1764 slightly increasing parallelism. Also, if there are few saves
1765 it may eliminate the iterator entirely. */
1767 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1768 && frame_pointer_needed)
1770 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1771 set_mem_alias_set (mem, get_varargs_alias_set ());
1779 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1780 spill_fill_data.init_reg[iter]);
1785 if (! CONST_OK_FOR_I (disp))
1787 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1788 emit_move_insn (tmp, disp_rtx);
1792 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1793 spill_fill_data.init_reg[iter],
1796 seq = gen_sequence ();
1800 /* Careful for being the first insn in a sequence. */
1801 if (spill_fill_data.init_after)
1802 spill_fill_data.init_after
1803 = emit_insn_after (seq, spill_fill_data.init_after);
1806 rtx first = get_insns ();
1808 spill_fill_data.init_after
1809 = emit_insn_before (seq, first);
1811 spill_fill_data.init_after = emit_insn (seq);
1815 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1817 /* ??? Not all of the spills are for varargs, but some of them are.
1818 The rest of the spills belong in an alias set of their own. But
1819 it doesn't actually hurt to include them here. */
1820 set_mem_alias_set (mem, get_varargs_alias_set ());
1822 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1823 spill_fill_data.prev_off[iter] = cfa_off;
1825 if (++iter >= spill_fill_data.n_iter)
1827 spill_fill_data.next_iter = iter;
1833 do_spill (move_fn, reg, cfa_off, frame_reg)
1834 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1836 HOST_WIDE_INT cfa_off;
1838 int iter = spill_fill_data.next_iter;
1841 mem = spill_restore_mem (reg, cfa_off);
1842 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1843 spill_fill_data.prev_insn[iter] = insn;
1850 RTX_FRAME_RELATED_P (insn) = 1;
1852 /* Don't even pretend that the unwind code can intuit its way
1853 through a pair of interleaved post_modify iterators. Just
1854 provide the correct answer. */
1856 if (frame_pointer_needed)
1858 base = hard_frame_pointer_rtx;
1863 base = stack_pointer_rtx;
1864 off = current_frame_info.total_size - cfa_off;
1868 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1869 gen_rtx_SET (VOIDmode,
1870 gen_rtx_MEM (GET_MODE (reg),
1871 plus_constant (base, off)),
1878 do_restore (move_fn, reg, cfa_off)
1879 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1881 HOST_WIDE_INT cfa_off;
1883 int iter = spill_fill_data.next_iter;
1886 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1887 GEN_INT (cfa_off)));
1888 spill_fill_data.prev_insn[iter] = insn;
1891 /* Wrapper functions that discards the CONST_INT spill offset. These
1892 exist so that we can give gr_spill/gr_fill the offset they need and
1893 use a consistant function interface. */
1896 gen_movdi_x (dest, src, offset)
1898 rtx offset ATTRIBUTE_UNUSED;
1900 return gen_movdi (dest, src);
1904 gen_fr_spill_x (dest, src, offset)
1906 rtx offset ATTRIBUTE_UNUSED;
1908 return gen_fr_spill (dest, src);
1912 gen_fr_restore_x (dest, src, offset)
1914 rtx offset ATTRIBUTE_UNUSED;
1916 return gen_fr_restore (dest, src);
1919 /* Called after register allocation to add any instructions needed for the
1920 prologue. Using a prologue insn is favored compared to putting all of the
1921 instructions in output_function_prologue(), since it allows the scheduler
1922 to intermix instructions with the saves of the caller saved registers. In
1923 some cases, it might be necessary to emit a barrier instruction as the last
1924 insn to prevent such scheduling.
1926 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1927 so that the debug info generation code can handle them properly.
1929 The register save area is layed out like so:
1931 [ varargs spill area ]
1932 [ fr register spill area ]
1933 [ br register spill area ]
1934 [ ar register spill area ]
1935 [ pr register spill area ]
1936 [ gr register spill area ] */
1938 /* ??? Get inefficient code when the frame size is larger than can fit in an
1939 adds instruction. */
1942 ia64_expand_prologue ()
1944 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1945 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1948 ia64_compute_frame_size (get_frame_size ());
1949 last_scratch_gr_reg = 15;
1951 /* If there is no epilogue, then we don't need some prologue insns.
1952 We need to avoid emitting the dead prologue insns, because flow
1953 will complain about them. */
1958 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1959 if ((e->flags & EDGE_FAKE) == 0
1960 && (e->flags & EDGE_FALLTHRU) != 0)
1962 epilogue_p = (e != NULL);
1967 /* Set the local, input, and output register names. We need to do this
1968 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1969 half. If we use in/loc/out register names, then we get assembler errors
1970 in crtn.S because there is no alloc insn or regstk directive in there. */
1971 if (! TARGET_REG_NAMES)
1973 int inputs = current_frame_info.n_input_regs;
1974 int locals = current_frame_info.n_local_regs;
1975 int outputs = current_frame_info.n_output_regs;
1977 for (i = 0; i < inputs; i++)
1978 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1979 for (i = 0; i < locals; i++)
1980 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1981 for (i = 0; i < outputs; i++)
1982 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1985 /* Set the frame pointer register name. The regnum is logically loc79,
1986 but of course we'll not have allocated that many locals. Rather than
1987 worrying about renumbering the existing rtxs, we adjust the name. */
1988 /* ??? This code means that we can never use one local register when
1989 there is a frame pointer. loc79 gets wasted in this case, as it is
1990 renamed to a register that will never be used. See also the try_locals
1991 code in find_gr_spill. */
1992 if (current_frame_info.reg_fp)
1994 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1995 reg_names[HARD_FRAME_POINTER_REGNUM]
1996 = reg_names[current_frame_info.reg_fp];
1997 reg_names[current_frame_info.reg_fp] = tmp;
2000 /* Fix up the return address placeholder. */
2001 /* ??? We can fail if __builtin_return_address is used, and we didn't
2002 allocate a register in which to save b0. I can't think of a way to
2003 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2004 then be sure that I got the right one. Further, reload doesn't seem
2005 to care if an eliminable register isn't used, and "eliminates" it
2007 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2008 && current_frame_info.reg_save_b0 != 0)
2009 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2011 /* We don't need an alloc instruction if we've used no outputs or locals. */
2012 if (current_frame_info.n_local_regs == 0
2013 && current_frame_info.n_output_regs == 0
2014 && current_frame_info.n_input_regs <= current_function_args_info.words)
2016 /* If there is no alloc, but there are input registers used, then we
2017 need a .regstk directive. */
2018 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2019 ar_pfs_save_reg = NULL_RTX;
2023 current_frame_info.need_regstk = 0;
2025 if (current_frame_info.reg_save_ar_pfs)
2026 regno = current_frame_info.reg_save_ar_pfs;
2028 regno = next_scratch_gr_reg ();
2029 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2031 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2032 GEN_INT (current_frame_info.n_input_regs),
2033 GEN_INT (current_frame_info.n_local_regs),
2034 GEN_INT (current_frame_info.n_output_regs),
2035 GEN_INT (current_frame_info.n_rotate_regs)));
2036 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2039 /* Set up frame pointer, stack pointer, and spill iterators. */
2041 n_varargs = cfun->machine->n_varargs;
2042 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2043 stack_pointer_rtx, 0);
2045 if (frame_pointer_needed)
2047 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2048 RTX_FRAME_RELATED_P (insn) = 1;
2051 if (current_frame_info.total_size != 0)
2053 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2056 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2057 offset = frame_size_rtx;
2060 regno = next_scratch_gr_reg ();
2061 offset = gen_rtx_REG (DImode, regno);
2062 emit_move_insn (offset, frame_size_rtx);
2065 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2066 stack_pointer_rtx, offset));
2068 if (! frame_pointer_needed)
2070 RTX_FRAME_RELATED_P (insn) = 1;
2071 if (GET_CODE (offset) != CONST_INT)
2074 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2075 gen_rtx_SET (VOIDmode,
2077 gen_rtx_PLUS (DImode,
2084 /* ??? At this point we must generate a magic insn that appears to
2085 modify the stack pointer, the frame pointer, and all spill
2086 iterators. This would allow the most scheduling freedom. For
2087 now, just hard stop. */
2088 emit_insn (gen_blockage ());
2091 /* Must copy out ar.unat before doing any integer spills. */
2092 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2094 if (current_frame_info.reg_save_ar_unat)
2096 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2099 alt_regno = next_scratch_gr_reg ();
2100 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2101 current_frame_info.gr_used_mask |= 1 << alt_regno;
2104 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2105 insn = emit_move_insn (ar_unat_save_reg, reg);
2106 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2108 /* Even if we're not going to generate an epilogue, we still
2109 need to save the register so that EH works. */
2110 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2111 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
2114 ar_unat_save_reg = NULL_RTX;
2116 /* Spill all varargs registers. Do this before spilling any GR registers,
2117 since we want the UNAT bits for the GR registers to override the UNAT
2118 bits from varargs, which we don't care about. */
2121 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2123 reg = gen_rtx_REG (DImode, regno);
2124 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2127 /* Locate the bottom of the register save area. */
2128 cfa_off = (current_frame_info.spill_cfa_off
2129 + current_frame_info.spill_size
2130 + current_frame_info.extra_spill_size);
2132 /* Save the predicate register block either in a register or in memory. */
2133 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2135 reg = gen_rtx_REG (DImode, PR_REG (0));
2136 if (current_frame_info.reg_save_pr != 0)
2138 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2139 insn = emit_move_insn (alt_reg, reg);
2141 /* ??? Denote pr spill/fill by a DImode move that modifies all
2142 64 hard registers. */
2143 RTX_FRAME_RELATED_P (insn) = 1;
2145 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2146 gen_rtx_SET (VOIDmode, alt_reg, reg),
2149 /* Even if we're not going to generate an epilogue, we still
2150 need to save the register so that EH works. */
2152 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2156 alt_regno = next_scratch_gr_reg ();
2157 alt_reg = gen_rtx_REG (DImode, alt_regno);
2158 insn = emit_move_insn (alt_reg, reg);
2159 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2164 /* Handle AR regs in numerical order. All of them get special handling. */
2165 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2166 && current_frame_info.reg_save_ar_unat == 0)
2168 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2169 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2173 /* The alloc insn already copied ar.pfs into a general register. The
2174 only thing we have to do now is copy that register to a stack slot
2175 if we'd not allocated a local register for the job. */
2176 if (current_frame_info.reg_save_ar_pfs == 0
2177 && ! current_function_is_leaf)
2179 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2180 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2184 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2186 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2187 if (current_frame_info.reg_save_ar_lc != 0)
2189 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2190 insn = emit_move_insn (alt_reg, reg);
2191 RTX_FRAME_RELATED_P (insn) = 1;
2193 /* Even if we're not going to generate an epilogue, we still
2194 need to save the register so that EH works. */
2196 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2200 alt_regno = next_scratch_gr_reg ();
2201 alt_reg = gen_rtx_REG (DImode, alt_regno);
2202 emit_move_insn (alt_reg, reg);
2203 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2208 /* We should now be at the base of the gr/br/fr spill area. */
2209 if (cfa_off != (current_frame_info.spill_cfa_off
2210 + current_frame_info.spill_size))
2213 /* Spill all general registers. */
2214 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2215 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2217 reg = gen_rtx_REG (DImode, regno);
2218 do_spill (gen_gr_spill, reg, cfa_off, reg);
2222 /* Handle BR0 specially -- it may be getting stored permanently in
2223 some GR register. */
2224 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2226 reg = gen_rtx_REG (DImode, BR_REG (0));
2227 if (current_frame_info.reg_save_b0 != 0)
2229 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2230 insn = emit_move_insn (alt_reg, reg);
2231 RTX_FRAME_RELATED_P (insn) = 1;
2233 /* Even if we're not going to generate an epilogue, we still
2234 need to save the register so that EH works. */
2236 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2240 alt_regno = next_scratch_gr_reg ();
2241 alt_reg = gen_rtx_REG (DImode, alt_regno);
2242 emit_move_insn (alt_reg, reg);
2243 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2248 /* Spill the rest of the BR registers. */
2249 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2250 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2252 alt_regno = next_scratch_gr_reg ();
2253 alt_reg = gen_rtx_REG (DImode, alt_regno);
2254 reg = gen_rtx_REG (DImode, regno);
2255 emit_move_insn (alt_reg, reg);
2256 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2260 /* Align the frame and spill all FR registers. */
2261 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2262 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2266 reg = gen_rtx_REG (TFmode, regno);
2267 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2271 if (cfa_off != current_frame_info.spill_cfa_off)
2274 finish_spill_pointers ();
2277 /* Called after register allocation to add any instructions needed for the
2278 epilogue. Using an epilogue insn is favored compared to putting all of the
2279 instructions in output_function_prologue(), since it allows the scheduler
2280 to intermix instructions with the saves of the caller saved registers. In
2281 some cases, it might be necessary to emit a barrier instruction as the last
2282 insn to prevent such scheduling. */
2285 ia64_expand_epilogue (sibcall_p)
2288 rtx insn, reg, alt_reg, ar_unat_save_reg;
2289 int regno, alt_regno, cfa_off;
2291 ia64_compute_frame_size (get_frame_size ());
2293 /* If there is a frame pointer, then we use it instead of the stack
2294 pointer, so that the stack pointer does not need to be valid when
2295 the epilogue starts. See EXIT_IGNORE_STACK. */
2296 if (frame_pointer_needed)
2297 setup_spill_pointers (current_frame_info.n_spilled,
2298 hard_frame_pointer_rtx, 0);
2300 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2301 current_frame_info.total_size);
2303 if (current_frame_info.total_size != 0)
2305 /* ??? At this point we must generate a magic insn that appears to
2306 modify the spill iterators and the frame pointer. This would
2307 allow the most scheduling freedom. For now, just hard stop. */
2308 emit_insn (gen_blockage ());
2311 /* Locate the bottom of the register save area. */
2312 cfa_off = (current_frame_info.spill_cfa_off
2313 + current_frame_info.spill_size
2314 + current_frame_info.extra_spill_size);
2316 /* Restore the predicate registers. */
2317 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2319 if (current_frame_info.reg_save_pr != 0)
2320 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2323 alt_regno = next_scratch_gr_reg ();
2324 alt_reg = gen_rtx_REG (DImode, alt_regno);
2325 do_restore (gen_movdi_x, alt_reg, cfa_off);
2328 reg = gen_rtx_REG (DImode, PR_REG (0));
2329 emit_move_insn (reg, alt_reg);
2332 /* Restore the application registers. */
2334 /* Load the saved unat from the stack, but do not restore it until
2335 after the GRs have been restored. */
2336 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2338 if (current_frame_info.reg_save_ar_unat != 0)
2340 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2343 alt_regno = next_scratch_gr_reg ();
2344 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2345 current_frame_info.gr_used_mask |= 1 << alt_regno;
2346 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2351 ar_unat_save_reg = NULL_RTX;
2353 if (current_frame_info.reg_save_ar_pfs != 0)
2355 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2356 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2357 emit_move_insn (reg, alt_reg);
2359 else if (! current_function_is_leaf)
2361 alt_regno = next_scratch_gr_reg ();
2362 alt_reg = gen_rtx_REG (DImode, alt_regno);
2363 do_restore (gen_movdi_x, alt_reg, cfa_off);
2365 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2366 emit_move_insn (reg, alt_reg);
2369 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2371 if (current_frame_info.reg_save_ar_lc != 0)
2372 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2375 alt_regno = next_scratch_gr_reg ();
2376 alt_reg = gen_rtx_REG (DImode, alt_regno);
2377 do_restore (gen_movdi_x, alt_reg, cfa_off);
2380 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2381 emit_move_insn (reg, alt_reg);
2384 /* We should now be at the base of the gr/br/fr spill area. */
2385 if (cfa_off != (current_frame_info.spill_cfa_off
2386 + current_frame_info.spill_size))
2389 /* Restore all general registers. */
2390 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2391 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2393 reg = gen_rtx_REG (DImode, regno);
2394 do_restore (gen_gr_restore, reg, cfa_off);
2398 /* Restore the branch registers. Handle B0 specially, as it may
2399 have gotten stored in some GR register. */
2400 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2402 if (current_frame_info.reg_save_b0 != 0)
2403 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2406 alt_regno = next_scratch_gr_reg ();
2407 alt_reg = gen_rtx_REG (DImode, alt_regno);
2408 do_restore (gen_movdi_x, alt_reg, cfa_off);
2411 reg = gen_rtx_REG (DImode, BR_REG (0));
2412 emit_move_insn (reg, alt_reg);
2415 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2416 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2418 alt_regno = next_scratch_gr_reg ();
2419 alt_reg = gen_rtx_REG (DImode, alt_regno);
2420 do_restore (gen_movdi_x, alt_reg, cfa_off);
2422 reg = gen_rtx_REG (DImode, regno);
2423 emit_move_insn (reg, alt_reg);
2426 /* Restore floating point registers. */
2427 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2428 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2432 reg = gen_rtx_REG (TFmode, regno);
2433 do_restore (gen_fr_restore_x, reg, cfa_off);
2437 /* Restore ar.unat for real. */
2438 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2440 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2441 emit_move_insn (reg, ar_unat_save_reg);
2444 if (cfa_off != current_frame_info.spill_cfa_off)
2447 finish_spill_pointers ();
2449 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2451 /* ??? At this point we must generate a magic insn that appears to
2452 modify the spill iterators, the stack pointer, and the frame
2453 pointer. This would allow the most scheduling freedom. For now,
2455 emit_insn (gen_blockage ());
2458 if (cfun->machine->ia64_eh_epilogue_sp)
2459 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2460 else if (frame_pointer_needed)
2462 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2463 RTX_FRAME_RELATED_P (insn) = 1;
2465 else if (current_frame_info.total_size)
2467 rtx offset, frame_size_rtx;
2469 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2470 if (CONST_OK_FOR_I (current_frame_info.total_size))
2471 offset = frame_size_rtx;
2474 regno = next_scratch_gr_reg ();
2475 offset = gen_rtx_REG (DImode, regno);
2476 emit_move_insn (offset, frame_size_rtx);
2479 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2482 RTX_FRAME_RELATED_P (insn) = 1;
2483 if (GET_CODE (offset) != CONST_INT)
2486 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2487 gen_rtx_SET (VOIDmode,
2489 gen_rtx_PLUS (DImode,
2496 if (cfun->machine->ia64_eh_epilogue_bsp)
2497 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2500 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2503 int fp = GR_REG (2);
2504 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2505 first available call clobbered register. If there was a frame_pointer
2506 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2507 so we have to make sure we're using the string "r2" when emitting
2508 the register name for the assmbler. */
2509 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2510 fp = HARD_FRAME_POINTER_REGNUM;
2512 /* We must emit an alloc to force the input registers to become output
2513 registers. Otherwise, if the callee tries to pass its parameters
2514 through to another call without an intervening alloc, then these
2516 /* ??? We don't need to preserve all input registers. We only need to
2517 preserve those input registers used as arguments to the sibling call.
2518 It is unclear how to compute that number here. */
2519 if (current_frame_info.n_input_regs != 0)
2520 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2521 GEN_INT (0), GEN_INT (0),
2522 GEN_INT (current_frame_info.n_input_regs),
2527 /* Return 1 if br.ret can do all the work required to return from a
2531 ia64_direct_return ()
2533 if (reload_completed && ! frame_pointer_needed)
2535 ia64_compute_frame_size (get_frame_size ());
2537 return (current_frame_info.total_size == 0
2538 && current_frame_info.n_spilled == 0
2539 && current_frame_info.reg_save_b0 == 0
2540 && current_frame_info.reg_save_pr == 0
2541 && current_frame_info.reg_save_ar_pfs == 0
2542 && current_frame_info.reg_save_ar_unat == 0
2543 && current_frame_info.reg_save_ar_lc == 0);
2549 ia64_hard_regno_rename_ok (from, to)
2553 /* Don't clobber any of the registers we reserved for the prologue. */
2554 if (to == current_frame_info.reg_fp
2555 || to == current_frame_info.reg_save_b0
2556 || to == current_frame_info.reg_save_pr
2557 || to == current_frame_info.reg_save_ar_pfs
2558 || to == current_frame_info.reg_save_ar_unat
2559 || to == current_frame_info.reg_save_ar_lc)
2562 if (from == current_frame_info.reg_fp
2563 || from == current_frame_info.reg_save_b0
2564 || from == current_frame_info.reg_save_pr
2565 || from == current_frame_info.reg_save_ar_pfs
2566 || from == current_frame_info.reg_save_ar_unat
2567 || from == current_frame_info.reg_save_ar_lc)
2570 /* Don't use output registers outside the register frame. */
2571 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2574 /* Retain even/oddness on predicate register pairs. */
2575 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2576 return (from & 1) == (to & 1);
2578 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2579 if (from == GR_REG (4) && current_function_calls_setjmp)
2585 /* Emit the function prologue. */
2588 ia64_output_function_prologue (file, size)
2590 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2592 int mask, grsave, grsave_prev;
2594 if (current_frame_info.need_regstk)
2595 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2596 current_frame_info.n_input_regs,
2597 current_frame_info.n_local_regs,
2598 current_frame_info.n_output_regs,
2599 current_frame_info.n_rotate_regs);
2601 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2604 /* Emit the .prologue directive. */
2607 grsave = grsave_prev = 0;
2608 if (current_frame_info.reg_save_b0 != 0)
2611 grsave = grsave_prev = current_frame_info.reg_save_b0;
2613 if (current_frame_info.reg_save_ar_pfs != 0
2614 && (grsave_prev == 0
2615 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2618 if (grsave_prev == 0)
2619 grsave = current_frame_info.reg_save_ar_pfs;
2620 grsave_prev = current_frame_info.reg_save_ar_pfs;
2622 if (current_frame_info.reg_fp != 0
2623 && (grsave_prev == 0
2624 || current_frame_info.reg_fp == grsave_prev + 1))
2627 if (grsave_prev == 0)
2628 grsave = HARD_FRAME_POINTER_REGNUM;
2629 grsave_prev = current_frame_info.reg_fp;
2631 if (current_frame_info.reg_save_pr != 0
2632 && (grsave_prev == 0
2633 || current_frame_info.reg_save_pr == grsave_prev + 1))
2636 if (grsave_prev == 0)
2637 grsave = current_frame_info.reg_save_pr;
2641 fprintf (file, "\t.prologue %d, %d\n", mask,
2642 ia64_dbx_register_number (grsave));
2644 fputs ("\t.prologue\n", file);
2646 /* Emit a .spill directive, if necessary, to relocate the base of
2647 the register spill area. */
2648 if (current_frame_info.spill_cfa_off != -16)
2649 fprintf (file, "\t.spill %ld\n",
2650 (long) (current_frame_info.spill_cfa_off
2651 + current_frame_info.spill_size));
2654 /* Emit the .body directive at the scheduled end of the prologue. */
2657 ia64_output_function_end_prologue (file)
2660 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2663 fputs ("\t.body\n", file);
2666 /* Emit the function epilogue. */
2669 ia64_output_function_epilogue (file, size)
2670 FILE *file ATTRIBUTE_UNUSED;
2671 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2675 /* Reset from the function's potential modifications. */
2676 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2678 if (current_frame_info.reg_fp)
2680 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2681 reg_names[HARD_FRAME_POINTER_REGNUM]
2682 = reg_names[current_frame_info.reg_fp];
2683 reg_names[current_frame_info.reg_fp] = tmp;
2685 if (! TARGET_REG_NAMES)
2687 for (i = 0; i < current_frame_info.n_input_regs; i++)
2688 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2689 for (i = 0; i < current_frame_info.n_local_regs; i++)
2690 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2691 for (i = 0; i < current_frame_info.n_output_regs; i++)
2692 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2695 current_frame_info.initialized = 0;
2699 ia64_dbx_register_number (regno)
2702 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2703 from its home at loc79 to something inside the register frame. We
2704 must perform the same renumbering here for the debug info. */
2705 if (current_frame_info.reg_fp)
2707 if (regno == HARD_FRAME_POINTER_REGNUM)
2708 regno = current_frame_info.reg_fp;
2709 else if (regno == current_frame_info.reg_fp)
2710 regno = HARD_FRAME_POINTER_REGNUM;
2713 if (IN_REGNO_P (regno))
2714 return 32 + regno - IN_REG (0);
2715 else if (LOC_REGNO_P (regno))
2716 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2717 else if (OUT_REGNO_P (regno))
2718 return (32 + current_frame_info.n_input_regs
2719 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2725 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2726 rtx addr, fnaddr, static_chain;
2728 rtx addr_reg, eight = GEN_INT (8);
2730 /* Load up our iterator. */
2731 addr_reg = gen_reg_rtx (Pmode);
2732 emit_move_insn (addr_reg, addr);
2734 /* The first two words are the fake descriptor:
2735 __ia64_trampoline, ADDR+16. */
2736 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2737 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2738 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2740 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2741 copy_to_reg (plus_constant (addr, 16)));
2742 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2744 /* The third word is the target descriptor. */
2745 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2746 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2748 /* The fourth word is the static chain. */
2749 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2752 /* Do any needed setup for a variadic function. CUM has not been updated
2753 for the last named argument which has type TYPE and mode MODE.
2755 We generate the actual spill instructions during prologue generation. */
2758 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2759 CUMULATIVE_ARGS cum;
2763 int second_time ATTRIBUTE_UNUSED;
2765 /* If this is a stdarg function, then skip the current argument. */
2766 if (! current_function_varargs)
2767 ia64_function_arg_advance (&cum, int_mode, type, 1);
2769 if (cum.words < MAX_ARGUMENT_SLOTS)
2771 int n = MAX_ARGUMENT_SLOTS - cum.words;
2772 *pretend_size = n * UNITS_PER_WORD;
2773 cfun->machine->n_varargs = n;
2777 /* Check whether TYPE is a homogeneous floating point aggregate. If
2778 it is, return the mode of the floating point type that appears
2779 in all leafs. If it is not, return VOIDmode.
2781 An aggregate is a homogeneous floating point aggregate is if all
2782 fields/elements in it have the same floating point type (e.g,
2783 SFmode). 128-bit quad-precision floats are excluded. */
2785 static enum machine_mode
2786 hfa_element_mode (type, nested)
2790 enum machine_mode element_mode = VOIDmode;
2791 enum machine_mode mode;
2792 enum tree_code code = TREE_CODE (type);
2793 int know_element_mode = 0;
2798 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2799 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2800 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2801 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2805 /* Fortran complex types are supposed to be HFAs, so we need to handle
2806 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2809 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2810 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2811 * BITS_PER_UNIT, MODE_FLOAT, 0);
2816 /* ??? Should exclude 128-bit long double here. */
2817 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2818 mode if this is contained within an aggregate. */
2820 return TYPE_MODE (type);
2825 return TYPE_MODE (TREE_TYPE (type));
2829 case QUAL_UNION_TYPE:
2830 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2832 if (TREE_CODE (t) != FIELD_DECL)
2835 mode = hfa_element_mode (TREE_TYPE (t), 1);
2836 if (know_element_mode)
2838 if (mode != element_mode)
2841 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2845 know_element_mode = 1;
2846 element_mode = mode;
2849 return element_mode;
2852 /* If we reach here, we probably have some front-end specific type
2853 that the backend doesn't know about. This can happen via the
2854 aggregate_value_p call in init_function_start. All we can do is
2855 ignore unknown tree types. */
2862 /* Return rtx for register where argument is passed, or zero if it is passed
2865 /* ??? 128-bit quad-precision floats are always passed in general
2869 ia64_function_arg (cum, mode, type, named, incoming)
2870 CUMULATIVE_ARGS *cum;
2871 enum machine_mode mode;
2876 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2877 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2878 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2881 enum machine_mode hfa_mode = VOIDmode;
2883 /* Integer and float arguments larger than 8 bytes start at the next even
2884 boundary. Aggregates larger than 8 bytes start at the next even boundary
2885 if the aggregate has 16 byte alignment. Net effect is that types with
2886 alignment greater than 8 start at the next even boundary. */
2887 /* ??? The ABI does not specify how to handle aggregates with alignment from
2888 9 to 15 bytes, or greater than 16. We handle them all as if they had
2889 16 byte alignment. Such aggregates can occur only if gcc extensions are
2891 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2893 && (cum->words & 1))
2896 /* If all argument slots are used, then it must go on the stack. */
2897 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2900 /* Check for and handle homogeneous FP aggregates. */
2902 hfa_mode = hfa_element_mode (type, 0);
2904 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2905 and unprototyped hfas are passed specially. */
2906 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2910 int fp_regs = cum->fp_regs;
2911 int int_regs = cum->words + offset;
2912 int hfa_size = GET_MODE_SIZE (hfa_mode);
2916 /* If prototyped, pass it in FR regs then GR regs.
2917 If not prototyped, pass it in both FR and GR regs.
2919 If this is an SFmode aggregate, then it is possible to run out of
2920 FR regs while GR regs are still left. In that case, we pass the
2921 remaining part in the GR regs. */
2923 /* Fill the FP regs. We do this always. We stop if we reach the end
2924 of the argument, the last FP register, or the last argument slot. */
2926 byte_size = ((mode == BLKmode)
2927 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2928 args_byte_size = int_regs * UNITS_PER_WORD;
2930 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2931 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2933 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2934 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2938 args_byte_size += hfa_size;
2942 /* If no prototype, then the whole thing must go in GR regs. */
2943 if (! cum->prototype)
2945 /* If this is an SFmode aggregate, then we might have some left over
2946 that needs to go in GR regs. */
2947 else if (byte_size != offset)
2948 int_regs += offset / UNITS_PER_WORD;
2950 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2952 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2954 enum machine_mode gr_mode = DImode;
2956 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2957 then this goes in a GR reg left adjusted/little endian, right
2958 adjusted/big endian. */
2959 /* ??? Currently this is handled wrong, because 4-byte hunks are
2960 always right adjusted/little endian. */
2963 /* If we have an even 4 byte hunk because the aggregate is a
2964 multiple of 4 bytes in size, then this goes in a GR reg right
2965 adjusted/little endian. */
2966 else if (byte_size - offset == 4)
2968 /* Complex floats need to have float mode. */
2969 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
2972 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2973 gen_rtx_REG (gr_mode, (basereg
2976 offset += GET_MODE_SIZE (gr_mode);
2977 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
2978 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
2981 /* If we ended up using just one location, just return that one loc. */
2983 return XEXP (loc[0], 0);
2985 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2988 /* Integral and aggregates go in general registers. If we have run out of
2989 FR registers, then FP values must also go in general registers. This can
2990 happen when we have a SFmode HFA. */
2991 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
2992 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
2993 return gen_rtx_REG (mode, basereg + cum->words + offset);
2995 /* If there is a prototype, then FP values go in a FR register when
2996 named, and in a GR registeer when unnamed. */
2997 else if (cum->prototype)
3000 return gen_rtx_REG (mode, basereg + cum->words + offset);
3002 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3004 /* If there is no prototype, then FP values go in both FR and GR
3008 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3009 gen_rtx_REG (mode, (FR_ARG_FIRST
3012 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3014 (basereg + cum->words
3018 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3022 /* Return number of words, at the beginning of the argument, that must be
3023 put in registers. 0 is the argument is entirely in registers or entirely
3027 ia64_function_arg_partial_nregs (cum, mode, type, named)
3028 CUMULATIVE_ARGS *cum;
3029 enum machine_mode mode;
3031 int named ATTRIBUTE_UNUSED;
3033 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3034 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3038 /* Arguments with alignment larger than 8 bytes start at the next even
3040 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3042 && (cum->words & 1))
3045 /* If all argument slots are used, then it must go on the stack. */
3046 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3049 /* It doesn't matter whether the argument goes in FR or GR regs. If
3050 it fits within the 8 argument slots, then it goes entirely in
3051 registers. If it extends past the last argument slot, then the rest
3052 goes on the stack. */
3054 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3057 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3060 /* Update CUM to point after this argument. This is patterned after
3061 ia64_function_arg. */
3064 ia64_function_arg_advance (cum, mode, type, named)
3065 CUMULATIVE_ARGS *cum;
3066 enum machine_mode mode;
3070 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3071 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3074 enum machine_mode hfa_mode = VOIDmode;
3076 /* If all arg slots are already full, then there is nothing to do. */
3077 if (cum->words >= MAX_ARGUMENT_SLOTS)
3080 /* Arguments with alignment larger than 8 bytes start at the next even
3082 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3084 && (cum->words & 1))
3087 cum->words += words + offset;
3089 /* Check for and handle homogeneous FP aggregates. */
3091 hfa_mode = hfa_element_mode (type, 0);
3093 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3094 and unprototyped hfas are passed specially. */
3095 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3097 int fp_regs = cum->fp_regs;
3098 /* This is the original value of cum->words + offset. */
3099 int int_regs = cum->words - words;
3100 int hfa_size = GET_MODE_SIZE (hfa_mode);
3104 /* If prototyped, pass it in FR regs then GR regs.
3105 If not prototyped, pass it in both FR and GR regs.
3107 If this is an SFmode aggregate, then it is possible to run out of
3108 FR regs while GR regs are still left. In that case, we pass the
3109 remaining part in the GR regs. */
3111 /* Fill the FP regs. We do this always. We stop if we reach the end
3112 of the argument, the last FP register, or the last argument slot. */
3114 byte_size = ((mode == BLKmode)
3115 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3116 args_byte_size = int_regs * UNITS_PER_WORD;
3118 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3119 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3122 args_byte_size += hfa_size;
3126 cum->fp_regs = fp_regs;
3129 /* Integral and aggregates go in general registers. If we have run out of
3130 FR registers, then FP values must also go in general registers. This can
3131 happen when we have a SFmode HFA. */
3132 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3135 /* If there is a prototype, then FP values go in a FR register when
3136 named, and in a GR registeer when unnamed. */
3137 else if (cum->prototype)
3142 /* ??? Complex types should not reach here. */
3143 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3145 /* If there is no prototype, then FP values go in both FR and GR
3148 /* ??? Complex types should not reach here. */
3149 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3154 /* Implement va_start. */
3157 ia64_va_start (stdarg_p, valist, nextarg)
3165 arg_words = current_function_args_info.words;
3170 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3172 nextarg = plus_constant (nextarg, ofs);
3173 std_expand_builtin_va_start (1, valist, nextarg);
3176 /* Implement va_arg. */
3179 ia64_va_arg (valist, type)
3184 /* Arguments with alignment larger than 8 bytes start at the next even
3186 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3188 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3189 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3190 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3191 build_int_2 (-2 * UNITS_PER_WORD, -1));
3192 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3193 TREE_SIDE_EFFECTS (t) = 1;
3194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3197 return std_expand_builtin_va_arg (valist, type);
3200 /* Return 1 if function return value returned in memory. Return 0 if it is
3204 ia64_return_in_memory (valtype)
3207 enum machine_mode mode;
3208 enum machine_mode hfa_mode;
3209 HOST_WIDE_INT byte_size;
3211 mode = TYPE_MODE (valtype);
3212 byte_size = GET_MODE_SIZE (mode);
3213 if (mode == BLKmode)
3215 byte_size = int_size_in_bytes (valtype);
3220 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3222 hfa_mode = hfa_element_mode (valtype, 0);
3223 if (hfa_mode != VOIDmode)
3225 int hfa_size = GET_MODE_SIZE (hfa_mode);
3227 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3232 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3238 /* Return rtx for register that holds the function return value. */
3241 ia64_function_value (valtype, func)
3243 tree func ATTRIBUTE_UNUSED;
3245 enum machine_mode mode;
3246 enum machine_mode hfa_mode;
3248 mode = TYPE_MODE (valtype);
3249 hfa_mode = hfa_element_mode (valtype, 0);
3251 if (hfa_mode != VOIDmode)
3259 hfa_size = GET_MODE_SIZE (hfa_mode);
3260 byte_size = ((mode == BLKmode)
3261 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3263 for (i = 0; offset < byte_size; i++)
3265 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3266 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3272 return XEXP (loc[0], 0);
3274 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3276 else if (FLOAT_TYPE_P (valtype) &&
3277 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3278 return gen_rtx_REG (mode, FR_ARG_FIRST);
3280 return gen_rtx_REG (mode, GR_RET_FIRST);
3283 /* Print a memory address as an operand to reference that memory location. */
3285 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3286 also call this from ia64_print_operand for memory addresses. */
3289 ia64_print_operand_address (stream, address)
3290 FILE * stream ATTRIBUTE_UNUSED;
3291 rtx address ATTRIBUTE_UNUSED;
3295 /* Print an operand to an assembler instruction.
3296 C Swap and print a comparison operator.
3297 D Print an FP comparison operator.
3298 E Print 32 - constant, for SImode shifts as extract.
3299 e Print 64 - constant, for DImode rotates.
3300 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3301 a floating point register emitted normally.
3302 I Invert a predicate register by adding 1.
3303 J Select the proper predicate register for a condition.
3304 j Select the inverse predicate register for a condition.
3305 O Append .acq for volatile load.
3306 P Postincrement of a MEM.
3307 Q Append .rel for volatile store.
3308 S Shift amount for shladd instruction.
3309 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3310 for Intel assembler.
3311 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3312 for Intel assembler.
3313 r Print register name, or constant 0 as r0. HP compatibility for
3316 ia64_print_operand (file, x, code)
3326 /* Handled below. */
3331 enum rtx_code c = swap_condition (GET_CODE (x));
3332 fputs (GET_RTX_NAME (c), file);
3337 switch (GET_CODE (x))
3349 str = GET_RTX_NAME (GET_CODE (x));
3356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3364 if (x == CONST0_RTX (GET_MODE (x)))
3365 str = reg_names [FR_REG (0)];
3366 else if (x == CONST1_RTX (GET_MODE (x)))
3367 str = reg_names [FR_REG (1)];
3368 else if (GET_CODE (x) == REG)
3369 str = reg_names [REGNO (x)];
3376 fputs (reg_names [REGNO (x) + 1], file);
3382 unsigned int regno = REGNO (XEXP (x, 0));
3383 if (GET_CODE (x) == EQ)
3387 fputs (reg_names [regno], file);
3392 if (MEM_VOLATILE_P (x))
3393 fputs(".acq", file);
3398 HOST_WIDE_INT value;
3400 switch (GET_CODE (XEXP (x, 0)))
3406 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3407 if (GET_CODE (x) == CONST_INT)
3409 else if (GET_CODE (x) == REG)
3411 fprintf (file, ", %s", reg_names[REGNO (x)]);
3419 value = GET_MODE_SIZE (GET_MODE (x));
3423 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3429 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3434 if (MEM_VOLATILE_P (x))
3435 fputs(".rel", file);
3439 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3443 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3445 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3451 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3453 const char *prefix = "0x";
3454 if (INTVAL (x) & 0x80000000)
3456 fprintf (file, "0xffffffff");
3459 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3465 /* If this operand is the constant zero, write it as register zero.
3466 Any register, zero, or CONST_INT value is OK here. */
3467 if (GET_CODE (x) == REG)
3468 fputs (reg_names[REGNO (x)], file);
3469 else if (x == CONST0_RTX (GET_MODE (x)))
3471 else if (GET_CODE (x) == CONST_INT)
3472 output_addr_const (file, x);
3474 output_operand_lossage ("invalid %%r value");
3481 /* For conditional branches, returns or calls, substitute
3482 sptk, dptk, dpnt, or spnt for %s. */
3483 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3486 int pred_val = INTVAL (XEXP (x, 0));
3488 /* Guess top and bottom 10% statically predicted. */
3489 if (pred_val < REG_BR_PROB_BASE / 50)
3491 else if (pred_val < REG_BR_PROB_BASE / 2)
3493 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3498 else if (GET_CODE (current_output_insn) == CALL_INSN)
3503 fputs (which, file);
3508 x = current_insn_predicate;
3511 unsigned int regno = REGNO (XEXP (x, 0));
3512 if (GET_CODE (x) == EQ)
3514 fprintf (file, "(%s) ", reg_names [regno]);
3519 output_operand_lossage ("ia64_print_operand: unknown code");
3523 switch (GET_CODE (x))
3525 /* This happens for the spill/restore instructions. */
3530 /* ... fall through ... */
3533 fputs (reg_names [REGNO (x)], file);
3538 rtx addr = XEXP (x, 0);
3539 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3540 addr = XEXP (addr, 0);
3541 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3546 output_addr_const (file, x);
3553 /* Calulate the cost of moving data from a register in class FROM to
3554 one in class TO, using MODE. */
3557 ia64_register_move_cost (mode, from, to)
3558 enum machine_mode mode;
3559 enum reg_class from, to;
3561 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3562 if (to == ADDL_REGS)
3564 if (from == ADDL_REGS)
3567 /* All costs are symmetric, so reduce cases by putting the
3568 lower number class as the destination. */
3571 enum reg_class tmp = to;
3572 to = from, from = tmp;
3575 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3576 so that we get secondary memory reloads. Between FR_REGS,
3577 we have to make this at least as expensive as MEMORY_MOVE_COST
3578 to avoid spectacularly poor register class preferencing. */
3581 if (to != GR_REGS || from != GR_REGS)
3582 return MEMORY_MOVE_COST (mode, to, 0);
3590 /* Moving between PR registers takes two insns. */
3591 if (from == PR_REGS)
3593 /* Moving between PR and anything but GR is impossible. */
3594 if (from != GR_REGS)
3595 return MEMORY_MOVE_COST (mode, to, 0);
3599 /* Moving between BR and anything but GR is impossible. */
3600 if (from != GR_REGS && from != GR_AND_BR_REGS)
3601 return MEMORY_MOVE_COST (mode, to, 0);
3606 /* Moving between AR and anything but GR is impossible. */
3607 if (from != GR_REGS)
3608 return MEMORY_MOVE_COST (mode, to, 0);
3613 case GR_AND_FR_REGS:
3614 case GR_AND_BR_REGS:
3625 /* This function returns the register class required for a secondary
3626 register when copying between one of the registers in CLASS, and X,
3627 using MODE. A return value of NO_REGS means that no secondary register
3631 ia64_secondary_reload_class (class, mode, x)
3632 enum reg_class class;
3633 enum machine_mode mode ATTRIBUTE_UNUSED;
3638 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3639 regno = true_regnum (x);
3646 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3647 interaction. We end up with two pseudos with overlapping lifetimes
3648 both of which are equiv to the same constant, and both which need
3649 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3650 changes depending on the path length, which means the qty_first_reg
3651 check in make_regs_eqv can give different answers at different times.
3652 At some point I'll probably need a reload_indi pattern to handle
3655 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3656 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3657 non-general registers for good measure. */
3658 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3661 /* This is needed if a pseudo used as a call_operand gets spilled to a
3663 if (GET_CODE (x) == MEM)
3668 /* Need to go through general regsters to get to other class regs. */
3669 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3672 /* This can happen when a paradoxical subreg is an operand to the
3674 /* ??? This shouldn't be necessary after instruction scheduling is
3675 enabled, because paradoxical subregs are not accepted by
3676 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3677 stop the paradoxical subreg stupidity in the *_operand functions
3679 if (GET_CODE (x) == MEM
3680 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3681 || GET_MODE (x) == QImode))
3684 /* This can happen because of the ior/and/etc patterns that accept FP
3685 registers as operands. If the third operand is a constant, then it
3686 needs to be reloaded into a FP register. */
3687 if (GET_CODE (x) == CONST_INT)
3690 /* This can happen because of register elimination in a muldi3 insn.
3691 E.g. `26107 * (unsigned long)&u'. */
3692 if (GET_CODE (x) == PLUS)
3697 /* ??? This happens if we cse/gcse a BImode value across a call,
3698 and the function has a nonlocal goto. This is because global
3699 does not allocate call crossing pseudos to hard registers when
3700 current_function_has_nonlocal_goto is true. This is relatively
3701 common for C++ programs that use exceptions. To reproduce,
3702 return NO_REGS and compile libstdc++. */
3703 if (GET_CODE (x) == MEM)
3706 /* This can happen when we take a BImode subreg of a DImode value,
3707 and that DImode value winds up in some non-GR register. */
3708 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3713 /* Since we have no offsettable memory addresses, we need a temporary
3714 to hold the address of the second word. */
3727 /* Emit text to declare externally defined variables and functions, because
3728 the Intel assembler does not support undefined externals. */
3731 ia64_asm_output_external (file, decl, name)
3736 int save_referenced;
3738 /* GNU as does not need anything here. */
3742 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3743 the linker when we do this, so we need to be careful not to do this for
3744 builtin functions which have no library equivalent. Unfortunately, we
3745 can't tell here whether or not a function will actually be called by
3746 expand_expr, so we pull in library functions even if we may not need
3748 if (! strcmp (name, "__builtin_next_arg")
3749 || ! strcmp (name, "alloca")
3750 || ! strcmp (name, "__builtin_constant_p")
3751 || ! strcmp (name, "__builtin_args_info"))
3754 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3756 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3757 if (TREE_CODE (decl) == FUNCTION_DECL)
3759 fprintf (file, "%s", TYPE_ASM_OP);
3760 assemble_name (file, name);
3762 fprintf (file, TYPE_OPERAND_FMT, "function");
3765 ASM_GLOBALIZE_LABEL (file, name);
3766 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3769 /* Parse the -mfixed-range= option string. */
3772 fix_range (const_str)
3773 const char *const_str;
3776 char *str, *dash, *comma;
3778 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3779 REG2 are either register names or register numbers. The effect
3780 of this option is to mark the registers in the range from REG1 to
3781 REG2 as ``fixed'' so they won't be used by the compiler. This is
3782 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3784 i = strlen (const_str);
3785 str = (char *) alloca (i + 1);
3786 memcpy (str, const_str, i + 1);
3790 dash = strchr (str, '-');
3793 warning ("value of -mfixed-range must have form REG1-REG2");
3798 comma = strchr (dash + 1, ',');
3802 first = decode_reg_name (str);
3805 warning ("unknown register name: %s", str);
3809 last = decode_reg_name (dash + 1);
3812 warning ("unknown register name: %s", dash + 1);
3820 warning ("%s-%s is an empty range", str, dash + 1);
3824 for (i = first; i <= last; ++i)
3825 fixed_regs[i] = call_used_regs[i] = 1;
3835 /* Called to register all of our global variables with the garbage
3839 ia64_add_gc_roots ()
3841 ggc_add_rtx_root (&ia64_compare_op0, 1);
3842 ggc_add_rtx_root (&ia64_compare_op1, 1);
3846 ia64_init_machine_status (p)
3850 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3854 ia64_mark_machine_status (p)
3857 struct machine_function *machine = p->machine;
3861 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3862 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3863 ggc_mark_rtx (machine->ia64_gp_save);
3868 ia64_free_machine_status (p)
3875 /* Handle TARGET_OPTIONS switches. */
3878 ia64_override_options ()
3880 if (TARGET_AUTO_PIC)
3881 target_flags |= MASK_CONST_GP;
3883 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3885 warning ("cannot optimize division for both latency and throughput");
3886 target_flags &= ~MASK_INLINE_DIV_THR;
3889 if (ia64_fixed_range_string)
3890 fix_range (ia64_fixed_range_string);
3892 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3893 flag_schedule_insns_after_reload = 0;
3895 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3897 init_machine_status = ia64_init_machine_status;
3898 mark_machine_status = ia64_mark_machine_status;
3899 free_machine_status = ia64_free_machine_status;
3901 ia64_add_gc_roots ();
3904 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3905 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3906 static enum attr_type ia64_safe_type PARAMS((rtx));
3908 static enum attr_itanium_requires_unit0
3909 ia64_safe_itanium_requires_unit0 (insn)
3912 if (recog_memoized (insn) >= 0)
3913 return get_attr_itanium_requires_unit0 (insn);
3915 return ITANIUM_REQUIRES_UNIT0_NO;
3918 static enum attr_itanium_class
3919 ia64_safe_itanium_class (insn)
3922 if (recog_memoized (insn) >= 0)
3923 return get_attr_itanium_class (insn);
3925 return ITANIUM_CLASS_UNKNOWN;
3928 static enum attr_type
3929 ia64_safe_type (insn)
3932 if (recog_memoized (insn) >= 0)
3933 return get_attr_type (insn);
3935 return TYPE_UNKNOWN;
3938 /* The following collection of routines emit instruction group stop bits as
3939 necessary to avoid dependencies. */
3941 /* Need to track some additional registers as far as serialization is
3942 concerned so we can properly handle br.call and br.ret. We could
3943 make these registers visible to gcc, but since these registers are
3944 never explicitly used in gcc generated code, it seems wasteful to
3945 do so (plus it would make the call and return patterns needlessly
3947 #define REG_GP (GR_REG (1))
3948 #define REG_RP (BR_REG (0))
3949 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3950 /* This is used for volatile asms which may require a stop bit immediately
3951 before and after them. */
3952 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3953 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3954 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3956 /* For each register, we keep track of how it has been written in the
3957 current instruction group.
3959 If a register is written unconditionally (no qualifying predicate),
3960 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3962 If a register is written if its qualifying predicate P is true, we
3963 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3964 may be written again by the complement of P (P^1) and when this happens,
3965 WRITE_COUNT gets set to 2.
3967 The result of this is that whenever an insn attempts to write a register
3968 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3970 If a predicate register is written by a floating-point insn, we set
3971 WRITTEN_BY_FP to true.
3973 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3974 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3976 struct reg_write_state
3978 unsigned int write_count : 2;
3979 unsigned int first_pred : 16;
3980 unsigned int written_by_fp : 1;
3981 unsigned int written_by_and : 1;
3982 unsigned int written_by_or : 1;
3985 /* Cumulative info for the current instruction group. */
3986 struct reg_write_state rws_sum[NUM_REGS];
3987 /* Info for the current instruction. This gets copied to rws_sum after a
3988 stop bit is emitted. */
3989 struct reg_write_state rws_insn[NUM_REGS];
3991 /* Indicates whether this is the first instruction after a stop bit,
3992 in which case we don't need another stop bit. Without this, we hit
3993 the abort in ia64_variable_issue when scheduling an alloc. */
3994 static int first_instruction;
3996 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3997 RTL for one instruction. */
4000 unsigned int is_write : 1; /* Is register being written? */
4001 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4002 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4003 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4004 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4005 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4008 static void rws_update PARAMS ((struct reg_write_state *, int,
4009 struct reg_flags, int));
4010 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4011 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4012 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4013 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4014 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4015 static void init_insn_group_barriers PARAMS ((void));
4016 static int group_barrier_needed_p PARAMS ((rtx));
4017 static int safe_group_barrier_needed_p PARAMS ((rtx));
4019 /* Update *RWS for REGNO, which is being written by the current instruction,
4020 with predicate PRED, and associated register flags in FLAGS. */
4023 rws_update (rws, regno, flags, pred)
4024 struct reg_write_state *rws;
4026 struct reg_flags flags;
4030 rws[regno].write_count++;
4032 rws[regno].write_count = 2;
4033 rws[regno].written_by_fp |= flags.is_fp;
4034 /* ??? Not tracking and/or across differing predicates. */
4035 rws[regno].written_by_and = flags.is_and;
4036 rws[regno].written_by_or = flags.is_or;
4037 rws[regno].first_pred = pred;
4040 /* Handle an access to register REGNO of type FLAGS using predicate register
4041 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4042 a dependency with an earlier instruction in the same group. */
4045 rws_access_regno (regno, flags, pred)
4047 struct reg_flags flags;
4050 int need_barrier = 0;
4052 if (regno >= NUM_REGS)
4055 if (! PR_REGNO_P (regno))
4056 flags.is_and = flags.is_or = 0;
4062 /* One insn writes same reg multiple times? */
4063 if (rws_insn[regno].write_count > 0)
4066 /* Update info for current instruction. */
4067 rws_update (rws_insn, regno, flags, pred);
4068 write_count = rws_sum[regno].write_count;
4070 switch (write_count)
4073 /* The register has not been written yet. */
4074 rws_update (rws_sum, regno, flags, pred);
4078 /* The register has been written via a predicate. If this is
4079 not a complementary predicate, then we need a barrier. */
4080 /* ??? This assumes that P and P+1 are always complementary
4081 predicates for P even. */
4082 if (flags.is_and && rws_sum[regno].written_by_and)
4084 else if (flags.is_or && rws_sum[regno].written_by_or)
4086 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4088 rws_update (rws_sum, regno, flags, pred);
4092 /* The register has been unconditionally written already. We
4094 if (flags.is_and && rws_sum[regno].written_by_and)
4096 else if (flags.is_or && rws_sum[regno].written_by_or)
4100 rws_sum[regno].written_by_and = flags.is_and;
4101 rws_sum[regno].written_by_or = flags.is_or;
4110 if (flags.is_branch)
4112 /* Branches have several RAW exceptions that allow to avoid
4115 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4116 /* RAW dependencies on branch regs are permissible as long
4117 as the writer is a non-branch instruction. Since we
4118 never generate code that uses a branch register written
4119 by a branch instruction, handling this case is
4123 if (REGNO_REG_CLASS (regno) == PR_REGS
4124 && ! rws_sum[regno].written_by_fp)
4125 /* The predicates of a branch are available within the
4126 same insn group as long as the predicate was written by
4127 something other than a floating-point instruction. */
4131 if (flags.is_and && rws_sum[regno].written_by_and)
4133 if (flags.is_or && rws_sum[regno].written_by_or)
4136 switch (rws_sum[regno].write_count)
4139 /* The register has not been written yet. */
4143 /* The register has been written via a predicate. If this is
4144 not a complementary predicate, then we need a barrier. */
4145 /* ??? This assumes that P and P+1 are always complementary
4146 predicates for P even. */
4147 if ((rws_sum[regno].first_pred ^ 1) != pred)
4152 /* The register has been unconditionally written already. We
4162 return need_barrier;
4166 rws_access_reg (reg, flags, pred)
4168 struct reg_flags flags;
4171 int regno = REGNO (reg);
4172 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4175 return rws_access_regno (regno, flags, pred);
4178 int need_barrier = 0;
4180 need_barrier |= rws_access_regno (regno + n, flags, pred);
4181 return need_barrier;
4185 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4186 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4189 update_set_flags (x, pflags, ppred, pcond)
4191 struct reg_flags *pflags;
4195 rtx src = SET_SRC (x);
4199 switch (GET_CODE (src))
4205 if (SET_DEST (x) == pc_rtx)
4206 /* X is a conditional branch. */
4210 int is_complemented = 0;
4212 /* X is a conditional move. */
4213 rtx cond = XEXP (src, 0);
4214 if (GET_CODE (cond) == EQ)
4215 is_complemented = 1;
4216 cond = XEXP (cond, 0);
4217 if (GET_CODE (cond) != REG
4218 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4221 if (XEXP (src, 1) == SET_DEST (x)
4222 || XEXP (src, 2) == SET_DEST (x))
4224 /* X is a conditional move that conditionally writes the
4227 /* We need another complement in this case. */
4228 if (XEXP (src, 1) == SET_DEST (x))
4229 is_complemented = ! is_complemented;
4231 *ppred = REGNO (cond);
4232 if (is_complemented)
4236 /* ??? If this is a conditional write to the dest, then this
4237 instruction does not actually read one source. This probably
4238 doesn't matter, because that source is also the dest. */
4239 /* ??? Multiple writes to predicate registers are allowed
4240 if they are all AND type compares, or if they are all OR
4241 type compares. We do not generate such instructions
4244 /* ... fall through ... */
4247 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4248 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4249 /* Set pflags->is_fp to 1 so that we know we're dealing
4250 with a floating point comparison when processing the
4251 destination of the SET. */
4254 /* Discover if this is a parallel comparison. We only handle
4255 and.orcm and or.andcm at present, since we must retain a
4256 strict inverse on the predicate pair. */
4257 else if (GET_CODE (src) == AND)
4259 else if (GET_CODE (src) == IOR)
4266 /* Subroutine of rtx_needs_barrier; this function determines whether the
4267 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4268 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4272 set_src_needs_barrier (x, flags, pred, cond)
4274 struct reg_flags flags;
4278 int need_barrier = 0;
4280 rtx src = SET_SRC (x);
4282 if (GET_CODE (src) == CALL)
4283 /* We don't need to worry about the result registers that
4284 get written by subroutine call. */
4285 return rtx_needs_barrier (src, flags, pred);
4286 else if (SET_DEST (x) == pc_rtx)
4288 /* X is a conditional branch. */
4289 /* ??? This seems redundant, as the caller sets this bit for
4291 flags.is_branch = 1;
4292 return rtx_needs_barrier (src, flags, pred);
4295 need_barrier = rtx_needs_barrier (src, flags, pred);
4297 /* This instruction unconditionally uses a predicate register. */
4299 need_barrier |= rws_access_reg (cond, flags, 0);
4302 if (GET_CODE (dst) == ZERO_EXTRACT)
4304 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4305 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4306 dst = XEXP (dst, 0);
4308 return need_barrier;
4311 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4312 Return 1 is this access creates a dependency with an earlier instruction
4313 in the same group. */
4316 rtx_needs_barrier (x, flags, pred)
4318 struct reg_flags flags;
4322 int is_complemented = 0;
4323 int need_barrier = 0;
4324 const char *format_ptr;
4325 struct reg_flags new_flags;
4333 switch (GET_CODE (x))
4336 update_set_flags (x, &new_flags, &pred, &cond);
4337 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4338 if (GET_CODE (SET_SRC (x)) != CALL)
4340 new_flags.is_write = 1;
4341 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4346 new_flags.is_write = 0;
4347 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4349 /* Avoid multiple register writes, in case this is a pattern with
4350 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4351 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4353 new_flags.is_write = 1;
4354 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4355 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4356 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4361 /* X is a predicated instruction. */
4363 cond = COND_EXEC_TEST (x);
4366 need_barrier = rtx_needs_barrier (cond, flags, 0);
4368 if (GET_CODE (cond) == EQ)
4369 is_complemented = 1;
4370 cond = XEXP (cond, 0);
4371 if (GET_CODE (cond) != REG
4372 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4374 pred = REGNO (cond);
4375 if (is_complemented)
4378 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4379 return need_barrier;
4383 /* Clobber & use are for earlier compiler-phases only. */
4388 /* We always emit stop bits for traditional asms. We emit stop bits
4389 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4390 if (GET_CODE (x) != ASM_OPERANDS
4391 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4393 /* Avoid writing the register multiple times if we have multiple
4394 asm outputs. This avoids an abort in rws_access_reg. */
4395 if (! rws_insn[REG_VOLATILE].write_count)
4397 new_flags.is_write = 1;
4398 rws_access_regno (REG_VOLATILE, new_flags, pred);
4403 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4404 We can not just fall through here since then we would be confused
4405 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4406 traditional asms unlike their normal usage. */
4408 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4409 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4414 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4416 rtx pat = XVECEXP (x, 0, i);
4417 if (GET_CODE (pat) == SET)
4419 update_set_flags (pat, &new_flags, &pred, &cond);
4420 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4422 else if (GET_CODE (pat) == USE
4423 || GET_CODE (pat) == CALL
4424 || GET_CODE (pat) == ASM_OPERANDS)
4425 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4426 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4429 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4431 rtx pat = XVECEXP (x, 0, i);
4432 if (GET_CODE (pat) == SET)
4434 if (GET_CODE (SET_SRC (pat)) != CALL)
4436 new_flags.is_write = 1;
4437 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4441 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4442 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4450 if (REGNO (x) == AR_UNAT_REGNUM)
4452 for (i = 0; i < 64; ++i)
4453 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4456 need_barrier = rws_access_reg (x, flags, pred);
4460 /* Find the regs used in memory address computation. */
4461 new_flags.is_write = 0;
4462 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4465 case CONST_INT: case CONST_DOUBLE:
4466 case SYMBOL_REF: case LABEL_REF: case CONST:
4469 /* Operators with side-effects. */
4470 case POST_INC: case POST_DEC:
4471 if (GET_CODE (XEXP (x, 0)) != REG)
4474 new_flags.is_write = 0;
4475 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4476 new_flags.is_write = 1;
4477 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4481 if (GET_CODE (XEXP (x, 0)) != REG)
4484 new_flags.is_write = 0;
4485 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4486 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4487 new_flags.is_write = 1;
4488 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4491 /* Handle common unary and binary ops for efficiency. */
4492 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4493 case MOD: case UDIV: case UMOD: case AND: case IOR:
4494 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4495 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4496 case NE: case EQ: case GE: case GT: case LE:
4497 case LT: case GEU: case GTU: case LEU: case LTU:
4498 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4499 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4502 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4503 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4504 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4505 case SQRT: case FFS:
4506 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4510 switch (XINT (x, 1))
4512 case 1: /* st8.spill */
4513 case 2: /* ld8.fill */
4515 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4516 HOST_WIDE_INT bit = (offset >> 3) & 63;
4518 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4519 new_flags.is_write = (XINT (x, 1) == 1);
4520 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4525 case 3: /* stf.spill */
4526 case 4: /* ldf.spill */
4527 case 8: /* popcnt */
4528 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4531 case 7: /* pred_rel_mutex */
4532 case 9: /* pic call */
4534 case 19: /* fetchadd_acq */
4535 case 20: /* mov = ar.bsp */
4536 case 21: /* flushrs */
4537 case 22: /* bundle selector */
4538 case 23: /* cycle display */
4541 case 24: /* addp4 */
4542 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4545 case 5: /* recip_approx */
4546 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4547 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4550 case 13: /* cmpxchg_acq */
4551 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4552 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4560 case UNSPEC_VOLATILE:
4561 switch (XINT (x, 1))
4564 /* Alloc must always be the first instruction of a group.
4565 We force this by always returning true. */
4566 /* ??? We might get better scheduling if we explicitly check for
4567 input/local/output register dependencies, and modify the
4568 scheduler so that alloc is always reordered to the start of
4569 the current group. We could then eliminate all of the
4570 first_instruction code. */
4571 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4573 new_flags.is_write = 1;
4574 rws_access_regno (REG_AR_CFM, new_flags, pred);
4577 case 1: /* blockage */
4578 case 2: /* insn group barrier */
4581 case 5: /* set_bsp */
4585 case 7: /* pred.rel.mutex */
4586 case 8: /* safe_across_calls all */
4587 case 9: /* safe_across_calls normal */
4596 new_flags.is_write = 0;
4597 need_barrier = rws_access_regno (REG_RP, flags, pred);
4598 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4600 new_flags.is_write = 1;
4601 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4602 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4606 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4607 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4608 switch (format_ptr[i])
4610 case '0': /* unused field */
4611 case 'i': /* integer */
4612 case 'n': /* note */
4613 case 'w': /* wide integer */
4614 case 's': /* pointer to string */
4615 case 'S': /* optional pointer to string */
4619 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4624 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4625 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4634 return need_barrier;
4637 /* Clear out the state for group_barrier_needed_p at the start of a
4638 sequence of insns. */
4641 init_insn_group_barriers ()
4643 memset (rws_sum, 0, sizeof (rws_sum));
4644 first_instruction = 1;
4647 /* Given the current state, recorded by previous calls to this function,
4648 determine whether a group barrier (a stop bit) is necessary before INSN.
4649 Return nonzero if so. */
4652 group_barrier_needed_p (insn)
4656 int need_barrier = 0;
4657 struct reg_flags flags;
4659 memset (&flags, 0, sizeof (flags));
4660 switch (GET_CODE (insn))
4666 /* A barrier doesn't imply an instruction group boundary. */
4670 memset (rws_insn, 0, sizeof (rws_insn));
4674 flags.is_branch = 1;
4675 flags.is_sibcall = SIBLING_CALL_P (insn);
4676 memset (rws_insn, 0, sizeof (rws_insn));
4678 /* Don't bundle a call following another call. */
4679 if ((pat = prev_active_insn (insn))
4680 && GET_CODE (pat) == CALL_INSN)
4686 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4690 flags.is_branch = 1;
4692 /* Don't bundle a jump following a call. */
4693 if ((pat = prev_active_insn (insn))
4694 && GET_CODE (pat) == CALL_INSN)
4702 if (GET_CODE (PATTERN (insn)) == USE
4703 || GET_CODE (PATTERN (insn)) == CLOBBER)
4704 /* Don't care about USE and CLOBBER "insns"---those are used to
4705 indicate to the optimizer that it shouldn't get rid of
4706 certain operations. */
4709 pat = PATTERN (insn);
4711 /* Ug. Hack hacks hacked elsewhere. */
4712 switch (recog_memoized (insn))
4714 /* We play dependency tricks with the epilogue in order
4715 to get proper schedules. Undo this for dv analysis. */
4716 case CODE_FOR_epilogue_deallocate_stack:
4717 pat = XVECEXP (pat, 0, 0);
4720 /* The pattern we use for br.cloop confuses the code above.
4721 The second element of the vector is representative. */
4722 case CODE_FOR_doloop_end_internal:
4723 pat = XVECEXP (pat, 0, 1);
4726 /* Doesn't generate code. */
4727 case CODE_FOR_pred_rel_mutex:
4734 memset (rws_insn, 0, sizeof (rws_insn));
4735 need_barrier = rtx_needs_barrier (pat, flags, 0);
4737 /* Check to see if the previous instruction was a volatile
4740 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4747 if (first_instruction)
4750 first_instruction = 0;
4753 return need_barrier;
4756 /* Like group_barrier_needed_p, but do not clobber the current state. */
4759 safe_group_barrier_needed_p (insn)
4762 struct reg_write_state rws_saved[NUM_REGS];
4763 int saved_first_instruction;
4766 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4767 saved_first_instruction = first_instruction;
4769 t = group_barrier_needed_p (insn);
4771 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4772 first_instruction = saved_first_instruction;
4777 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4778 as necessary to eliminate dependendencies. This function assumes that
4779 a final instruction scheduling pass has been run which has already
4780 inserted most of the necessary stop bits. This function only inserts
4781 new ones at basic block boundaries, since these are invisible to the
4785 emit_insn_group_barriers (dump, insns)
4791 int insns_since_last_label = 0;
4793 init_insn_group_barriers ();
4795 for (insn = insns; insn; insn = NEXT_INSN (insn))
4797 if (GET_CODE (insn) == CODE_LABEL)
4799 if (insns_since_last_label)
4801 insns_since_last_label = 0;
4803 else if (GET_CODE (insn) == NOTE
4804 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4806 if (insns_since_last_label)
4808 insns_since_last_label = 0;
4810 else if (GET_CODE (insn) == INSN
4811 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4812 && XINT (PATTERN (insn), 1) == 2)
4814 init_insn_group_barriers ();
4817 else if (INSN_P (insn))
4819 insns_since_last_label = 1;
4821 if (group_barrier_needed_p (insn))
4826 fprintf (dump, "Emitting stop before label %d\n",
4827 INSN_UID (last_label));
4828 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4831 init_insn_group_barriers ();
4839 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4840 This function has to emit all necessary group barriers. */
4843 emit_all_insn_group_barriers (dump, insns)
4844 FILE *dump ATTRIBUTE_UNUSED;
4849 init_insn_group_barriers ();
4851 for (insn = insns; insn; insn = NEXT_INSN (insn))
4853 if (GET_CODE (insn) == BARRIER)
4855 rtx last = prev_active_insn (insn);
4859 if (GET_CODE (last) == JUMP_INSN
4860 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4861 last = prev_active_insn (last);
4862 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4863 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4865 init_insn_group_barriers ();
4867 else if (INSN_P (insn))
4869 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4870 init_insn_group_barriers ();
4871 else if (group_barrier_needed_p (insn))
4873 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4874 init_insn_group_barriers ();
4875 group_barrier_needed_p (insn);
4881 static int errata_find_address_regs PARAMS ((rtx *, void *));
4882 static void errata_emit_nops PARAMS ((rtx));
4883 static void fixup_errata PARAMS ((void));
4885 /* This structure is used to track some details about the previous insns
4886 groups so we can determine if it may be necessary to insert NOPs to
4887 workaround hardware errata. */
4890 HARD_REG_SET p_reg_set;
4891 HARD_REG_SET gr_reg_conditionally_set;
4894 /* Index into the last_group array. */
4895 static int group_idx;
4897 /* Called through for_each_rtx; determines if a hard register that was
4898 conditionally set in the previous group is used as an address register.
4899 It ensures that for_each_rtx returns 1 in that case. */
4901 errata_find_address_regs (xp, data)
4903 void *data ATTRIBUTE_UNUSED;
4906 if (GET_CODE (x) != MEM)
4909 if (GET_CODE (x) == POST_MODIFY)
4911 if (GET_CODE (x) == REG)
4913 struct group *prev_group = last_group + (group_idx ^ 1);
4914 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4922 /* Called for each insn; this function keeps track of the state in
4923 last_group and emits additional NOPs if necessary to work around
4924 an Itanium A/B step erratum. */
4926 errata_emit_nops (insn)
4929 struct group *this_group = last_group + group_idx;
4930 struct group *prev_group = last_group + (group_idx ^ 1);
4931 rtx pat = PATTERN (insn);
4932 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4933 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4934 enum attr_type type;
4937 if (GET_CODE (real_pat) == USE
4938 || GET_CODE (real_pat) == CLOBBER
4939 || GET_CODE (real_pat) == ASM_INPUT
4940 || GET_CODE (real_pat) == ADDR_VEC
4941 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4942 || asm_noperands (PATTERN (insn)) >= 0)
4945 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4948 if (GET_CODE (set) == PARALLEL)
4951 set = XVECEXP (real_pat, 0, 0);
4952 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4953 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4954 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4961 if (set && GET_CODE (set) != SET)
4964 type = get_attr_type (insn);
4967 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4968 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4970 if ((type == TYPE_M || type == TYPE_A) && cond && set
4971 && REG_P (SET_DEST (set))
4972 && GET_CODE (SET_SRC (set)) != PLUS
4973 && GET_CODE (SET_SRC (set)) != MINUS
4974 && (GET_CODE (SET_SRC (set)) != ASHIFT
4975 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
4976 && (GET_CODE (SET_SRC (set)) != MEM
4977 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4978 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4980 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4981 || ! REG_P (XEXP (cond, 0)))
4984 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4985 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4987 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4989 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4990 emit_insn_before (gen_nop (), insn);
4991 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4993 memset (last_group, 0, sizeof last_group);
4997 /* Emit extra nops if they are required to work around hardware errata. */
5004 if (! TARGET_B_STEP)
5008 memset (last_group, 0, sizeof last_group);
5010 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5015 if (ia64_safe_type (insn) == TYPE_S)
5018 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5021 errata_emit_nops (insn);
5025 /* Instruction scheduling support. */
5026 /* Describe one bundle. */
5030 /* Zero if there's no possibility of a stop in this bundle other than
5031 at the end, otherwise the position of the optional stop bit. */
5033 /* The types of the three slots. */
5034 enum attr_type t[3];
5035 /* The pseudo op to be emitted into the assembler output. */
5039 #define NR_BUNDLES 10
5041 /* A list of all available bundles. */
5043 static const struct bundle bundle[NR_BUNDLES] =
5045 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5046 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5047 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5048 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5049 #if NR_BUNDLES == 10
5050 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5051 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5053 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5054 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5055 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5056 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5057 it matches an L type insn. Otherwise we'll try to generate L type
5059 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5062 /* Describe a packet of instructions. Packets consist of two bundles that
5063 are visible to the hardware in one scheduling window. */
5067 const struct bundle *t1, *t2;
5068 /* Precomputed value of the first split issue in this packet if a cycle
5069 starts at its beginning. */
5071 /* For convenience, the insn types are replicated here so we don't have
5072 to go through T1 and T2 all the time. */
5073 enum attr_type t[6];
5076 /* An array containing all possible packets. */
5077 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5078 static struct ia64_packet packets[NR_PACKETS];
5080 /* Map attr_type to a string with the name. */
5082 static const char *const type_names[] =
5084 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5087 /* Nonzero if we should insert stop bits into the schedule. */
5088 int ia64_final_schedule = 0;
5090 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5091 static rtx ia64_single_set PARAMS ((rtx));
5092 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5093 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5094 static void maybe_rotate PARAMS ((FILE *));
5095 static void finish_last_head PARAMS ((FILE *, int));
5096 static void rotate_one_bundle PARAMS ((FILE *));
5097 static void rotate_two_bundles PARAMS ((FILE *));
5098 static void nop_cycles_until PARAMS ((int, FILE *));
5099 static void cycle_end_fill_slots PARAMS ((FILE *));
5100 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5101 static int get_split PARAMS ((const struct ia64_packet *, int));
5102 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5103 const struct ia64_packet *, int));
5104 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5105 rtx *, enum attr_type *, int));
5106 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5107 static void dump_current_packet PARAMS ((FILE *));
5108 static void schedule_stop PARAMS ((FILE *));
5109 static rtx gen_nop_type PARAMS ((enum attr_type));
5110 static void ia64_emit_nops PARAMS ((void));
5112 /* Map a bundle number to its pseudo-op. */
5118 return bundle[b].name;
5121 /* Compute the slot which will cause a split issue in packet P if the
5122 current cycle begins at slot BEGIN. */
5125 itanium_split_issue (p, begin)
5126 const struct ia64_packet *p;
5129 int type_count[TYPE_S];
5135 /* Always split before and after MMF. */
5136 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5138 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5140 /* Always split after MBB and BBB. */
5141 if (p->t[1] == TYPE_B)
5143 /* Split after first bundle in MIB BBB combination. */
5144 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5148 memset (type_count, 0, sizeof type_count);
5149 for (i = begin; i < split; i++)
5151 enum attr_type t0 = p->t[i];
5152 /* An MLX bundle reserves the same units as an MFI bundle. */
5153 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5154 : t0 == TYPE_X ? TYPE_I
5157 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5158 2 integer per cycle. */
5159 int max = (t == TYPE_B ? 3 : 2);
5160 if (type_count[t] == max)
5168 /* Return the maximum number of instructions a cpu can issue. */
5176 /* Helper function - like single_set, but look inside COND_EXEC. */
5179 ia64_single_set (insn)
5182 rtx x = PATTERN (insn);
5183 if (GET_CODE (x) == COND_EXEC)
5184 x = COND_EXEC_CODE (x);
5185 if (GET_CODE (x) == SET)
5187 return single_set_2 (insn, x);
5190 /* Adjust the cost of a scheduling dependency. Return the new cost of
5191 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5194 ia64_adjust_cost (insn, link, dep_insn, cost)
5195 rtx insn, link, dep_insn;
5198 enum attr_type dep_type;
5199 enum attr_itanium_class dep_class;
5200 enum attr_itanium_class insn_class;
5201 rtx dep_set, set, src, addr;
5203 if (GET_CODE (PATTERN (insn)) == CLOBBER
5204 || GET_CODE (PATTERN (insn)) == USE
5205 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5206 || GET_CODE (PATTERN (dep_insn)) == USE
5207 /* @@@ Not accurate for indirect calls. */
5208 || GET_CODE (insn) == CALL_INSN
5209 || ia64_safe_type (insn) == TYPE_S)
5212 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5213 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5216 dep_type = ia64_safe_type (dep_insn);
5217 dep_class = ia64_safe_itanium_class (dep_insn);
5218 insn_class = ia64_safe_itanium_class (insn);
5220 /* Compares that feed a conditional branch can execute in the same
5222 dep_set = ia64_single_set (dep_insn);
5223 set = ia64_single_set (insn);
5225 if (dep_type != TYPE_F
5227 && GET_CODE (SET_DEST (dep_set)) == REG
5228 && PR_REG (REGNO (SET_DEST (dep_set)))
5229 && GET_CODE (insn) == JUMP_INSN)
5232 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5234 /* ??? Can't find any information in the documenation about whether
5238 splits issue. Assume it doesn't. */
5242 src = set ? SET_SRC (set) : 0;
5244 if (set && GET_CODE (SET_DEST (set)) == MEM)
5245 addr = XEXP (SET_DEST (set), 0);
5246 else if (set && GET_CODE (src) == MEM)
5247 addr = XEXP (src, 0);
5248 else if (set && GET_CODE (src) == ZERO_EXTEND
5249 && GET_CODE (XEXP (src, 0)) == MEM)
5250 addr = XEXP (XEXP (src, 0), 0);
5251 else if (set && GET_CODE (src) == UNSPEC
5252 && XVECLEN (XEXP (src, 0), 0) > 0
5253 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5254 addr = XEXP (XVECEXP (src, 0, 0), 0);
5255 if (addr && GET_CODE (addr) == POST_MODIFY)
5256 addr = XEXP (addr, 0);
5258 set = ia64_single_set (dep_insn);
5260 if ((dep_class == ITANIUM_CLASS_IALU
5261 || dep_class == ITANIUM_CLASS_ILOG
5262 || dep_class == ITANIUM_CLASS_LD)
5263 && (insn_class == ITANIUM_CLASS_LD
5264 || insn_class == ITANIUM_CLASS_ST))
5266 if (! addr || ! set)
5268 /* This isn't completely correct - an IALU that feeds an address has
5269 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5270 otherwise. Unfortunately there's no good way to describe this. */
5271 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5274 if ((dep_class == ITANIUM_CLASS_IALU
5275 || dep_class == ITANIUM_CLASS_ILOG
5276 || dep_class == ITANIUM_CLASS_LD)
5277 && (insn_class == ITANIUM_CLASS_MMMUL
5278 || insn_class == ITANIUM_CLASS_MMSHF
5279 || insn_class == ITANIUM_CLASS_MMSHFI))
5281 if (dep_class == ITANIUM_CLASS_FMAC
5282 && (insn_class == ITANIUM_CLASS_FMISC
5283 || insn_class == ITANIUM_CLASS_FCVTFX
5284 || insn_class == ITANIUM_CLASS_XMPY))
5286 if ((dep_class == ITANIUM_CLASS_FMAC
5287 || dep_class == ITANIUM_CLASS_FMISC
5288 || dep_class == ITANIUM_CLASS_FCVTFX
5289 || dep_class == ITANIUM_CLASS_XMPY)
5290 && insn_class == ITANIUM_CLASS_STF)
5292 if ((dep_class == ITANIUM_CLASS_MMMUL
5293 || dep_class == ITANIUM_CLASS_MMSHF
5294 || dep_class == ITANIUM_CLASS_MMSHFI)
5295 && (insn_class == ITANIUM_CLASS_LD
5296 || insn_class == ITANIUM_CLASS_ST
5297 || insn_class == ITANIUM_CLASS_IALU
5298 || insn_class == ITANIUM_CLASS_ILOG
5299 || insn_class == ITANIUM_CLASS_ISHF))
5305 /* Describe the current state of the Itanium pipeline. */
5308 /* The first slot that is used in the current cycle. */
5310 /* The next slot to fill. */
5312 /* The packet we have selected for the current issue window. */
5313 const struct ia64_packet *packet;
5314 /* The position of the split issue that occurs due to issue width
5315 limitations (6 if there's no split issue). */
5317 /* Record data about the insns scheduled so far in the same issue
5318 window. The elements up to but not including FIRST_SLOT belong
5319 to the previous cycle, the ones starting with FIRST_SLOT belong
5320 to the current cycle. */
5321 enum attr_type types[6];
5324 /* Nonzero if we decided to schedule a stop bit. */
5328 /* Temporary arrays; they have enough elements to hold all insns that
5329 can be ready at the same time while scheduling of the current block.
5330 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5331 static rtx *sched_ready;
5332 static enum attr_type *sched_types;
5334 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5338 insn_matches_slot (p, itype, slot, insn)
5339 const struct ia64_packet *p;
5340 enum attr_type itype;
5344 enum attr_itanium_requires_unit0 u0;
5345 enum attr_type stype = p->t[slot];
5349 u0 = ia64_safe_itanium_requires_unit0 (insn);
5350 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5353 for (i = sched_data.first_slot; i < slot; i++)
5354 if (p->t[i] == stype
5355 || (stype == TYPE_F && p->t[i] == TYPE_L)
5356 || (stype == TYPE_I && p->t[i] == TYPE_X))
5359 if (GET_CODE (insn) == CALL_INSN)
5361 /* Reject calls in multiway branch packets. We want to limit
5362 the number of multiway branches we generate (since the branch
5363 predictor is limited), and this seems to work fairly well.
5364 (If we didn't do this, we'd have to add another test here to
5365 force calls into the third slot of the bundle.) */
5368 if (p->t[1] == TYPE_B)
5373 if (p->t[4] == TYPE_B)
5381 if (itype == TYPE_A)
5382 return stype == TYPE_M || stype == TYPE_I;
5386 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5387 assembly output a bit prettier. */
5390 ia64_emit_insn_before (insn, before)
5393 rtx prev = PREV_INSN (before);
5394 if (prev && GET_CODE (prev) == INSN
5395 && GET_CODE (PATTERN (prev)) == UNSPEC
5396 && XINT (PATTERN (prev), 1) == 23)
5398 emit_insn_before (insn, before);
5402 /* Generate a nop insn of the given type. Note we never generate L type
5412 return gen_nop_m ();
5414 return gen_nop_i ();
5416 return gen_nop_b ();
5418 return gen_nop_f ();
5420 return gen_nop_x ();
5427 /* When rotating a bundle out of the issue window, insert a bundle selector
5428 insn in front of it. DUMP is the scheduling dump file or NULL. START
5429 is either 0 or 3, depending on whether we want to emit a bundle selector
5430 for the first bundle or the second bundle in the current issue window.
5432 The selector insns are emitted this late because the selected packet can
5433 be changed until parts of it get rotated out. */
5436 finish_last_head (dump, start)
5440 const struct ia64_packet *p = sched_data.packet;
5441 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5442 int bundle_type = b - bundle;
5446 if (! ia64_final_schedule)
5449 for (i = start; sched_data.insns[i] == 0; i++)
5452 insn = sched_data.insns[i];
5455 fprintf (dump, "// Emitting template before %d: %s\n",
5456 INSN_UID (insn), b->name);
5458 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5461 /* We can't schedule more insns this cycle. Fix up the scheduling state
5462 and advance FIRST_SLOT and CUR.
5463 We have to distribute the insns that are currently found between
5464 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5465 far, they are stored successively in the fields starting at FIRST_SLOT;
5466 now they must be moved to the correct slots.
5467 DUMP is the current scheduling dump file, or NULL. */
5470 cycle_end_fill_slots (dump)
5473 const struct ia64_packet *packet = sched_data.packet;
5475 enum attr_type tmp_types[6];
5478 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5479 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5481 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5483 enum attr_type t = tmp_types[i];
5484 if (t != ia64_safe_type (tmp_insns[i]))
5486 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5488 if (slot > sched_data.split)
5491 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5493 sched_data.types[slot] = packet->t[slot];
5494 sched_data.insns[slot] = 0;
5495 sched_data.stopbit[slot] = 0;
5497 /* ??? TYPE_L instructions always fill up two slots, but we don't
5498 support TYPE_L nops. */
5499 if (packet->t[slot] == TYPE_L)
5504 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5505 actual slot type later. */
5506 sched_data.types[slot] = packet->t[slot];
5507 sched_data.insns[slot] = tmp_insns[i];
5508 sched_data.stopbit[slot] = 0;
5510 /* TYPE_L instructions always fill up two slots. */
5515 /* This isn't right - there's no need to pad out until the forced split;
5516 the CPU will automatically split if an insn isn't ready. */
5518 while (slot < sched_data.split)
5520 sched_data.types[slot] = packet->t[slot];
5521 sched_data.insns[slot] = 0;
5522 sched_data.stopbit[slot] = 0;
5527 sched_data.first_slot = sched_data.cur = slot;
5530 /* Bundle rotations, as described in the Itanium optimization manual.
5531 We can rotate either one or both bundles out of the issue window.
5532 DUMP is the current scheduling dump file, or NULL. */
5535 rotate_one_bundle (dump)
5539 fprintf (dump, "// Rotating one bundle.\n");
5541 finish_last_head (dump, 0);
5542 if (sched_data.cur > 3)
5544 sched_data.cur -= 3;
5545 sched_data.first_slot -= 3;
5546 memmove (sched_data.types,
5547 sched_data.types + 3,
5548 sched_data.cur * sizeof *sched_data.types);
5549 memmove (sched_data.stopbit,
5550 sched_data.stopbit + 3,
5551 sched_data.cur * sizeof *sched_data.stopbit);
5552 memmove (sched_data.insns,
5553 sched_data.insns + 3,
5554 sched_data.cur * sizeof *sched_data.insns);
5559 sched_data.first_slot = 0;
5564 rotate_two_bundles (dump)
5568 fprintf (dump, "// Rotating two bundles.\n");
5570 if (sched_data.cur == 0)
5573 finish_last_head (dump, 0);
5574 if (sched_data.cur > 3)
5575 finish_last_head (dump, 3);
5577 sched_data.first_slot = 0;
5580 /* We're beginning a new block. Initialize data structures as necessary. */
5583 ia64_sched_init (dump, sched_verbose, max_ready)
5584 FILE *dump ATTRIBUTE_UNUSED;
5585 int sched_verbose ATTRIBUTE_UNUSED;
5588 static int initialized = 0;
5596 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5598 const struct bundle *t1 = bundle + b1;
5599 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5601 const struct bundle *t2 = bundle + b2;
5607 for (i = 0; i < NR_PACKETS; i++)
5610 for (j = 0; j < 3; j++)
5611 packets[i].t[j] = packets[i].t1->t[j];
5612 for (j = 0; j < 3; j++)
5613 packets[i].t[j + 3] = packets[i].t2->t[j];
5614 packets[i].first_split = itanium_split_issue (packets + i, 0);
5619 init_insn_group_barriers ();
5621 memset (&sched_data, 0, sizeof sched_data);
5622 sched_types = (enum attr_type *) xmalloc (max_ready
5623 * sizeof (enum attr_type));
5624 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5627 /* See if the packet P can match the insns we have already scheduled. Return
5628 nonzero if so. In *PSLOT, we store the first slot that is available for
5629 more instructions if we choose this packet.
5630 SPLIT holds the last slot we can use, there's a split issue after it so
5631 scheduling beyond it would cause us to use more than one cycle. */
5634 packet_matches_p (p, split, pslot)
5635 const struct ia64_packet *p;
5639 int filled = sched_data.cur;
5640 int first = sched_data.first_slot;
5643 /* First, check if the first of the two bundles must be a specific one (due
5645 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5647 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5650 for (i = 0; i < first; i++)
5651 if (! insn_matches_slot (p, sched_data.types[i], i,
5652 sched_data.insns[i]))
5654 for (i = slot = first; i < filled; i++)
5656 while (slot < split)
5658 if (insn_matches_slot (p, sched_data.types[i], slot,
5659 sched_data.insns[i]))
5673 /* A frontend for itanium_split_issue. For a packet P and a slot
5674 number FIRST that describes the start of the current clock cycle,
5675 return the slot number of the first split issue. This function
5676 uses the cached number found in P if possible. */
5679 get_split (p, first)
5680 const struct ia64_packet *p;
5684 return p->first_split;
5685 return itanium_split_issue (p, first);
5688 /* Given N_READY insns in the array READY, whose types are found in the
5689 corresponding array TYPES, return the insn that is best suited to be
5690 scheduled in slot SLOT of packet P. */
5693 find_best_insn (ready, types, n_ready, p, slot)
5695 enum attr_type *types;
5697 const struct ia64_packet *p;
5702 while (n_ready-- > 0)
5704 rtx insn = ready[n_ready];
5707 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5709 /* If we have equally good insns, one of which has a stricter
5710 slot requirement, prefer the one with the stricter requirement. */
5711 if (best >= 0 && types[n_ready] == TYPE_A)
5713 if (insn_matches_slot (p, types[n_ready], slot, insn))
5716 best_pri = INSN_PRIORITY (ready[best]);
5718 /* If there's no way we could get a stricter requirement, stop
5720 if (types[n_ready] != TYPE_A
5721 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5729 /* Select the best packet to use given the current scheduler state and the
5731 READY is an array holding N_READY ready insns; TYPES is a corresponding
5732 array that holds their types. Store the best packet in *PPACKET and the
5733 number of insns that can be scheduled in the current cycle in *PBEST. */
5736 find_best_packet (pbest, ppacket, ready, types, n_ready)
5738 const struct ia64_packet **ppacket;
5740 enum attr_type *types;
5743 int first = sched_data.first_slot;
5746 const struct ia64_packet *best_packet = NULL;
5749 for (i = 0; i < NR_PACKETS; i++)
5751 const struct ia64_packet *p = packets + i;
5753 int split = get_split (p, first);
5755 int first_slot, last_slot;
5758 if (! packet_matches_p (p, split, &first_slot))
5761 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5765 for (slot = first_slot; slot < split; slot++)
5769 /* Disallow a degenerate case where the first bundle doesn't
5770 contain anything but NOPs! */
5771 if (first_slot == 0 && win == 0 && slot == 3)
5777 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5780 sched_ready[insn_nr] = 0;
5784 else if (p->t[slot] == TYPE_B)
5787 /* We must disallow MBB/BBB packets if any of their B slots would be
5788 filled with nops. */
5791 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5796 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5801 || (win == best && last_slot < lowest_end))
5804 lowest_end = last_slot;
5809 *ppacket = best_packet;
5812 /* Reorder the ready list so that the insns that can be issued in this cycle
5813 are found in the correct order at the end of the list.
5814 DUMP is the scheduling dump file, or NULL. READY points to the start,
5815 E_READY to the end of the ready list. MAY_FAIL determines what should be
5816 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5817 otherwise we return 0.
5818 Return 1 if any insns can be scheduled in this cycle. */
5821 itanium_reorder (dump, ready, e_ready, may_fail)
5827 const struct ia64_packet *best_packet;
5828 int n_ready = e_ready - ready;
5829 int first = sched_data.first_slot;
5830 int i, best, best_split, filled;
5832 for (i = 0; i < n_ready; i++)
5833 sched_types[i] = ia64_safe_type (ready[i]);
5835 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5846 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5847 best_packet->t1->name,
5848 best_packet->t2 ? best_packet->t2->name : NULL, best);
5851 best_split = itanium_split_issue (best_packet, first);
5852 packet_matches_p (best_packet, best_split, &filled);
5854 for (i = filled; i < best_split; i++)
5858 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5861 rtx insn = ready[insn_nr];
5862 memmove (ready + insn_nr, ready + insn_nr + 1,
5863 (n_ready - insn_nr - 1) * sizeof (rtx));
5864 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5865 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5866 ready[--n_ready] = insn;
5870 sched_data.packet = best_packet;
5871 sched_data.split = best_split;
5875 /* Dump information about the current scheduling state to file DUMP. */
5878 dump_current_packet (dump)
5882 fprintf (dump, "// %d slots filled:", sched_data.cur);
5883 for (i = 0; i < sched_data.first_slot; i++)
5885 rtx insn = sched_data.insns[i];
5886 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5888 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5889 if (sched_data.stopbit[i])
5890 fprintf (dump, " ;;");
5892 fprintf (dump, " :::");
5893 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5895 rtx insn = sched_data.insns[i];
5896 enum attr_type t = ia64_safe_type (insn);
5897 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5899 fprintf (dump, "\n");
5902 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5906 schedule_stop (dump)
5909 const struct ia64_packet *best = sched_data.packet;
5914 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5916 if (sched_data.cur == 0)
5919 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5921 rotate_two_bundles (NULL);
5925 for (i = -1; i < NR_PACKETS; i++)
5927 /* This is a slight hack to give the current packet the first chance.
5928 This is done to avoid e.g. switching from MIB to MBB bundles. */
5929 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5930 int split = get_split (p, sched_data.first_slot);
5931 const struct bundle *compare;
5934 if (! packet_matches_p (p, split, &next))
5937 compare = next > 3 ? p->t2 : p->t1;
5940 if (compare->possible_stop)
5941 stoppos = compare->possible_stop;
5945 if (stoppos < next || stoppos >= best_stop)
5947 if (compare->possible_stop == 0)
5949 stoppos = (next > 3 ? 6 : 3);
5951 if (stoppos < next || stoppos >= best_stop)
5955 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5956 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5959 best_stop = stoppos;
5963 sched_data.packet = best;
5964 cycle_end_fill_slots (dump);
5965 while (sched_data.cur < best_stop)
5967 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5968 sched_data.insns[sched_data.cur] = 0;
5969 sched_data.stopbit[sched_data.cur] = 0;
5972 sched_data.stopbit[sched_data.cur - 1] = 1;
5973 sched_data.first_slot = best_stop;
5976 dump_current_packet (dump);
5979 /* If necessary, perform one or two rotations on the scheduling state.
5980 This should only be called if we are starting a new cycle. */
5986 if (sched_data.cur == 6)
5987 rotate_two_bundles (dump);
5988 else if (sched_data.cur >= 3)
5989 rotate_one_bundle (dump);
5990 sched_data.first_slot = sched_data.cur;
5993 /* The clock cycle when ia64_sched_reorder was last called. */
5994 static int prev_cycle;
5996 /* The first insn scheduled in the previous cycle. This is the saved
5997 value of sched_data.first_slot. */
5998 static int prev_first;
6000 /* The last insn that has been scheduled. At the start of a new cycle
6001 we know that we can emit new insns after it; the main scheduling code
6002 has already emitted a cycle_display insn after it and is using that
6003 as its current last insn. */
6004 static rtx last_issued;
6006 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6007 pad out the delay between MM (shifts, etc.) and integer operations. */
6010 nop_cycles_until (clock_var, dump)
6014 int prev_clock = prev_cycle;
6015 int cycles_left = clock_var - prev_clock;
6017 /* Finish the previous cycle; pad it out with NOPs. */
6018 if (sched_data.cur == 3)
6020 rtx t = gen_insn_group_barrier (GEN_INT (3));
6021 last_issued = emit_insn_after (t, last_issued);
6022 maybe_rotate (dump);
6024 else if (sched_data.cur > 0)
6027 int split = itanium_split_issue (sched_data.packet, prev_first);
6029 if (sched_data.cur < 3 && split > 3)
6035 if (split > sched_data.cur)
6038 for (i = sched_data.cur; i < split; i++)
6042 t = gen_nop_type (sched_data.packet->t[i]);
6043 last_issued = emit_insn_after (t, last_issued);
6044 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6045 sched_data.insns[i] = last_issued;
6046 sched_data.stopbit[i] = 0;
6048 sched_data.cur = split;
6051 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6055 for (i = sched_data.cur; i < 6; i++)
6059 t = gen_nop_type (sched_data.packet->t[i]);
6060 last_issued = emit_insn_after (t, last_issued);
6061 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
6062 sched_data.insns[i] = last_issued;
6063 sched_data.stopbit[i] = 0;
6070 if (need_stop || sched_data.cur == 6)
6072 rtx t = gen_insn_group_barrier (GEN_INT (3));
6073 last_issued = emit_insn_after (t, last_issued);
6075 maybe_rotate (dump);
6079 while (cycles_left > 0)
6081 rtx t = gen_bundle_selector (GEN_INT (0));
6082 last_issued = emit_insn_after (t, last_issued);
6083 t = gen_nop_type (TYPE_M);
6084 last_issued = emit_insn_after (t, last_issued);
6085 t = gen_nop_type (TYPE_I);
6086 last_issued = emit_insn_after (t, last_issued);
6087 if (cycles_left > 1)
6089 t = gen_insn_group_barrier (GEN_INT (2));
6090 last_issued = emit_insn_after (t, last_issued);
6093 t = gen_nop_type (TYPE_I);
6094 last_issued = emit_insn_after (t, last_issued);
6095 t = gen_insn_group_barrier (GEN_INT (3));
6096 last_issued = emit_insn_after (t, last_issued);
6101 /* We are about to being issuing insns for this clock cycle.
6102 Override the default sort algorithm to better slot instructions. */
6105 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6106 reorder_type, clock_var)
6107 FILE *dump ATTRIBUTE_UNUSED;
6108 int sched_verbose ATTRIBUTE_UNUSED;
6111 int reorder_type, clock_var;
6114 int n_ready = *pn_ready;
6115 rtx *e_ready = ready + n_ready;
6120 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6121 dump_current_packet (dump);
6124 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6126 for (insnp = ready; insnp < e_ready; insnp++)
6129 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6130 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
6131 || t == ITANIUM_CLASS_ILOG
6132 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
6135 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6136 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
6137 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
6139 rtx other = XEXP (link, 0);
6140 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6141 if (t0 == ITANIUM_CLASS_MMSHF
6142 || t0 == ITANIUM_CLASS_MMMUL)
6144 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6153 prev_first = sched_data.first_slot;
6154 prev_cycle = clock_var;
6156 if (reorder_type == 0)
6157 maybe_rotate (sched_verbose ? dump : NULL);
6159 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6161 for (insnp = ready; insnp < e_ready; insnp++)
6162 if (insnp < e_ready)
6165 enum attr_type t = ia64_safe_type (insn);
6166 if (t == TYPE_UNKNOWN)
6168 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6169 || asm_noperands (PATTERN (insn)) >= 0)
6171 rtx lowest = ready[n_asms];
6172 ready[n_asms] = insn;
6178 rtx highest = ready[n_ready - 1];
6179 ready[n_ready - 1] = insn;
6181 if (ia64_final_schedule && group_barrier_needed_p (insn))
6183 schedule_stop (sched_verbose ? dump : NULL);
6184 sched_data.last_was_stop = 1;
6185 maybe_rotate (sched_verbose ? dump : NULL);
6192 if (n_asms < n_ready)
6194 /* Some normal insns to process. Skip the asms. */
6198 else if (n_ready > 0)
6200 /* Only asm insns left. */
6201 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6203 schedule_stop (sched_verbose ? dump : NULL);
6204 sched_data.last_was_stop = 1;
6205 maybe_rotate (sched_verbose ? dump : NULL);
6207 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6211 if (ia64_final_schedule)
6213 int nr_need_stop = 0;
6215 for (insnp = ready; insnp < e_ready; insnp++)
6216 if (safe_group_barrier_needed_p (*insnp))
6219 /* Schedule a stop bit if
6220 - all insns require a stop bit, or
6221 - we are starting a new cycle and _any_ insns require a stop bit.
6222 The reason for the latter is that if our schedule is accurate, then
6223 the additional stop won't decrease performance at this point (since
6224 there's a split issue at this point anyway), but it gives us more
6225 freedom when scheduling the currently ready insns. */
6226 if ((reorder_type == 0 && nr_need_stop)
6227 || (reorder_type == 1 && n_ready == nr_need_stop))
6229 schedule_stop (sched_verbose ? dump : NULL);
6230 sched_data.last_was_stop = 1;
6231 maybe_rotate (sched_verbose ? dump : NULL);
6232 if (reorder_type == 1)
6239 /* Move down everything that needs a stop bit, preserving relative
6241 while (insnp-- > ready + deleted)
6242 while (insnp >= ready + deleted)
6245 if (! safe_group_barrier_needed_p (insn))
6247 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6253 if (deleted != nr_need_stop)
6258 return itanium_reorder (sched_verbose ? dump : NULL,
6259 ready, e_ready, reorder_type == 1);
6263 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6270 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6271 pn_ready, 0, clock_var);
6274 /* Like ia64_sched_reorder, but called after issuing each insn.
6275 Override the default sort algorithm to better slot instructions. */
6278 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6279 FILE *dump ATTRIBUTE_UNUSED;
6280 int sched_verbose ATTRIBUTE_UNUSED;
6285 if (sched_data.last_was_stop)
6288 /* Detect one special case and try to optimize it.
6289 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6290 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6291 if (sched_data.first_slot == 1
6292 && sched_data.stopbit[0]
6293 && ((sched_data.cur == 4
6294 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6295 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6296 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6297 || (sched_data.cur == 3
6298 && (sched_data.types[1] == TYPE_M
6299 || sched_data.types[1] == TYPE_A)
6300 && (sched_data.types[2] != TYPE_M
6301 && sched_data.types[2] != TYPE_I
6302 && sched_data.types[2] != TYPE_A))))
6306 rtx stop = sched_data.insns[1];
6308 /* Search backward for the stop bit that must be there. */
6313 stop = PREV_INSN (stop);
6314 if (GET_CODE (stop) != INSN)
6316 insn_code = recog_memoized (stop);
6318 /* Ignore cycle displays and .pred.rel.mutex. */
6319 if (insn_code == CODE_FOR_cycle_display
6320 || insn_code == CODE_FOR_pred_rel_mutex)
6323 if (insn_code == CODE_FOR_insn_group_barrier)
6328 /* Adjust the stop bit's slot selector. */
6329 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6331 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6333 sched_data.stopbit[0] = 0;
6334 sched_data.stopbit[2] = 1;
6336 sched_data.types[5] = sched_data.types[3];
6337 sched_data.types[4] = sched_data.types[2];
6338 sched_data.types[3] = sched_data.types[1];
6339 sched_data.insns[5] = sched_data.insns[3];
6340 sched_data.insns[4] = sched_data.insns[2];
6341 sched_data.insns[3] = sched_data.insns[1];
6342 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6343 sched_data.cur += 2;
6344 sched_data.first_slot = 3;
6345 for (i = 0; i < NR_PACKETS; i++)
6347 const struct ia64_packet *p = packets + i;
6348 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6350 sched_data.packet = p;
6354 rotate_one_bundle (sched_verbose ? dump : NULL);
6357 for (i = 0; i < NR_PACKETS; i++)
6359 const struct ia64_packet *p = packets + i;
6360 int split = get_split (p, sched_data.first_slot);
6363 /* Disallow multiway branches here. */
6364 if (p->t[1] == TYPE_B)
6367 if (packet_matches_p (p, split, &next) && next < best)
6370 sched_data.packet = p;
6371 sched_data.split = split;
6380 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6385 /* Did we schedule a stop? If so, finish this cycle. */
6386 if (sched_data.cur == sched_data.first_slot)
6391 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6393 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6395 dump_current_packet (dump);
6399 /* We are about to issue INSN. Return the number of insns left on the
6400 ready queue that can be issued this cycle. */
6403 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6407 int can_issue_more ATTRIBUTE_UNUSED;
6409 enum attr_type t = ia64_safe_type (insn);
6413 if (sched_data.last_was_stop)
6415 int t = sched_data.first_slot;
6418 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6419 init_insn_group_barriers ();
6420 sched_data.last_was_stop = 0;
6423 if (t == TYPE_UNKNOWN)
6426 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6427 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6428 || asm_noperands (PATTERN (insn)) >= 0)
6430 /* This must be some kind of asm. Clear the scheduling state. */
6431 rotate_two_bundles (sched_verbose ? dump : NULL);
6432 if (ia64_final_schedule)
6433 group_barrier_needed_p (insn);
6438 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6439 important state info. Don't delete this test. */
6440 if (ia64_final_schedule
6441 && group_barrier_needed_p (insn))
6444 sched_data.stopbit[sched_data.cur] = 0;
6445 sched_data.insns[sched_data.cur] = insn;
6446 sched_data.types[sched_data.cur] = t;
6450 fprintf (dump, "// Scheduling insn %d of type %s\n",
6451 INSN_UID (insn), type_names[t]);
6453 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6455 schedule_stop (sched_verbose ? dump : NULL);
6456 sched_data.last_was_stop = 1;
6462 /* Free data allocated by ia64_sched_init. */
6465 ia64_sched_finish (dump, sched_verbose)
6470 fprintf (dump, "// Finishing schedule.\n");
6471 rotate_two_bundles (NULL);
6477 ia64_cycle_display (clock, last)
6481 if (ia64_final_schedule)
6482 return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last);
6487 /* Emit pseudo-ops for the assembler to describe predicate relations.
6488 At present this assumes that we only consider predicate pairs to
6489 be mutex, and that the assembler can deduce proper values from
6490 straight-line code. */
6493 emit_predicate_relation_info ()
6497 for (i = n_basic_blocks - 1; i >= 0; --i)
6499 basic_block bb = BASIC_BLOCK (i);
6501 rtx head = bb->head;
6503 /* We only need such notes at code labels. */
6504 if (GET_CODE (head) != CODE_LABEL)
6506 if (GET_CODE (NEXT_INSN (head)) == NOTE
6507 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6508 head = NEXT_INSN (head);
6510 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6511 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6513 rtx p = gen_rtx_REG (BImode, r);
6514 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6515 if (head == bb->end)
6521 /* Look for conditional calls that do not return, and protect predicate
6522 relations around them. Otherwise the assembler will assume the call
6523 returns, and complain about uses of call-clobbered predicates after
6525 for (i = n_basic_blocks - 1; i >= 0; --i)
6527 basic_block bb = BASIC_BLOCK (i);
6528 rtx insn = bb->head;
6532 if (GET_CODE (insn) == CALL_INSN
6533 && GET_CODE (PATTERN (insn)) == COND_EXEC
6534 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6536 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6537 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6538 if (bb->head == insn)
6540 if (bb->end == insn)
6544 if (insn == bb->end)
6546 insn = NEXT_INSN (insn);
6551 /* Generate a NOP instruction of type T. We will never generate L type
6561 return gen_nop_m ();
6563 return gen_nop_i ();
6565 return gen_nop_b ();
6567 return gen_nop_f ();
6569 return gen_nop_x ();
6575 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6576 here than while scheduling. */
6582 const struct bundle *b = 0;
6585 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6589 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6590 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6592 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6593 || GET_CODE (insn) == CODE_LABEL)
6596 while (bundle_pos < 3)
6598 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6601 if (GET_CODE (insn) != CODE_LABEL)
6602 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6608 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6610 int t = INTVAL (XVECEXP (pat, 0, 0));
6612 while (bundle_pos < t)
6614 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6620 if (bundle_pos == 3)
6623 if (b && INSN_P (insn))
6625 t = ia64_safe_type (insn);
6626 if (asm_noperands (PATTERN (insn)) >= 0
6627 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6629 while (bundle_pos < 3)
6631 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6637 if (t == TYPE_UNKNOWN)
6639 while (bundle_pos < 3)
6641 if (t == b->t[bundle_pos]
6642 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6643 || b->t[bundle_pos] == TYPE_I)))
6646 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6655 /* Perform machine dependent operations on the rtl chain INSNS. */
6661 /* If optimizing, we'll have split before scheduling. */
6663 split_all_insns_noflow ();
6665 /* Make sure the CFG and global_live_at_start are correct
6666 for emit_predicate_relation_info. */
6667 find_basic_blocks (insns, max_reg_num (), NULL);
6668 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6670 if (ia64_flag_schedule_insns2)
6672 timevar_push (TV_SCHED2);
6673 ia64_final_schedule = 1;
6674 schedule_ebbs (rtl_dump_file);
6675 ia64_final_schedule = 0;
6676 timevar_pop (TV_SCHED2);
6678 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6679 place as they were during scheduling. */
6680 emit_insn_group_barriers (rtl_dump_file, insns);
6684 emit_all_insn_group_barriers (rtl_dump_file, insns);
6686 /* A call must not be the last instruction in a function, so that the
6687 return address is still within the function, so that unwinding works
6688 properly. Note that IA-64 differs from dwarf2 on this point. */
6689 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6694 insn = get_last_insn ();
6695 if (! INSN_P (insn))
6696 insn = prev_active_insn (insn);
6697 if (GET_CODE (insn) == INSN
6698 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6699 && XINT (PATTERN (insn), 1) == 2)
6702 insn = prev_active_insn (insn);
6704 if (GET_CODE (insn) == CALL_INSN)
6707 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6708 emit_insn (gen_break_f ());
6709 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6714 emit_predicate_relation_info ();
6717 /* Return true if REGNO is used by the epilogue. */
6720 ia64_epilogue_uses (regno)
6726 /* When a function makes a call through a function descriptor, we
6727 will write a (potentially) new value to "gp". After returning
6728 from such a call, we need to make sure the function restores the
6729 original gp-value, even if the function itself does not use the
6731 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6733 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6734 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6735 /* For functions defined with the syscall_linkage attribute, all
6736 input registers are marked as live at all function exits. This
6737 prevents the register allocator from using the input registers,
6738 which in turn makes it possible to restart a system call after
6739 an interrupt without having to save/restore the input registers.
6740 This also prevents kernel data from leaking to application code. */
6741 return lookup_attribute ("syscall_linkage",
6742 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6745 /* Conditional return patterns can't represent the use of `b0' as
6746 the return address, so we force the value live this way. */
6750 /* Likewise for ar.pfs, which is used by br.ret. */
6758 /* Table of valid machine attributes. */
6759 const struct attribute_spec ia64_attribute_table[] =
6761 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6762 { "syscall_linkage", 0, 0, false, true, true, NULL },
6763 { NULL, 0, 0, false, false, false, NULL }
6766 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6768 We add @ to the name if this goes in small data/bss. We can only put
6769 a variable in small data/bss if it is defined in this module or a module
6770 that we are statically linked with. We can't check the second condition,
6771 but TREE_STATIC gives us the first one. */
6773 /* ??? If we had IPA, we could check the second condition. We could support
6774 programmer added section attributes if the variable is not defined in this
6777 /* ??? See the v850 port for a cleaner way to do this. */
6779 /* ??? We could also support own long data here. Generating movl/add/ld8
6780 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6781 code faster because there is one less load. This also includes incomplete
6782 types which can't go in sdata/sbss. */
6784 /* ??? See select_section. We must put short own readonly variables in
6785 sdata/sbss instead of the more natural rodata, because we can't perform
6786 the DECL_READONLY_SECTION test here. */
6788 extern struct obstack * saveable_obstack;
6791 ia64_encode_section_info (decl)
6794 const char *symbol_str;
6796 if (TREE_CODE (decl) == FUNCTION_DECL)
6798 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6802 /* Careful not to prod global register variables. */
6803 if (TREE_CODE (decl) != VAR_DECL
6804 || GET_CODE (DECL_RTL (decl)) != MEM
6805 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6808 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6810 /* We assume that -fpic is used only to create a shared library (dso).
6811 With -fpic, no global data can ever be sdata.
6812 Without -fpic, global common uninitialized data can never be sdata, since
6813 it can unify with a real definition in a dso. */
6814 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6815 to access them. The linker may then be able to do linker relaxation to
6816 optimize references to them. Currently sdata implies use of gprel. */
6817 /* We need the DECL_EXTERNAL check for C++. static class data members get
6818 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6819 statically allocated, but the space is allocated somewhere else. Such
6820 decls can not be own data. */
6821 if (! TARGET_NO_SDATA
6822 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6823 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6824 && ! (TREE_PUBLIC (decl)
6826 || (DECL_COMMON (decl)
6827 && (DECL_INITIAL (decl) == 0
6828 || DECL_INITIAL (decl) == error_mark_node))))
6829 /* Either the variable must be declared without a section attribute,
6830 or the section must be sdata or sbss. */
6831 && (DECL_SECTION_NAME (decl) == 0
6832 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6834 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6837 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6839 /* If the variable has already been defined in the output file, then it
6840 is too late to put it in sdata if it wasn't put there in the first
6841 place. The test is here rather than above, because if it is already
6842 in sdata, then it can stay there. */
6844 if (TREE_ASM_WRITTEN (decl))
6847 /* If this is an incomplete type with size 0, then we can't put it in
6848 sdata because it might be too big when completed. */
6850 && size <= (HOST_WIDE_INT) ia64_section_threshold
6851 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6853 size_t len = strlen (symbol_str);
6854 char *newstr = alloca (len + 1);
6857 *newstr = SDATA_NAME_FLAG_CHAR;
6858 memcpy (newstr + 1, symbol_str, len + 1);
6860 string = ggc_alloc_string (newstr, len + 1);
6861 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6864 /* This decl is marked as being in small data/bss but it shouldn't
6865 be; one likely explanation for this is that the decl has been
6866 moved into a different section from the one it was in when
6867 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6868 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6870 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6871 = ggc_strdup (symbol_str + 1);
6875 /* Output assembly directives for prologue regions. */
6877 /* The current basic block number. */
6879 static int block_num;
6881 /* True if we need a copy_state command at the start of the next block. */
6883 static int need_copy_state;
6885 /* The function emits unwind directives for the start of an epilogue. */
6890 /* If this isn't the last block of the function, then we need to label the
6891 current state, and copy it back in at the start of the next block. */
6893 if (block_num != n_basic_blocks - 1)
6895 fprintf (asm_out_file, "\t.label_state 1\n");
6896 need_copy_state = 1;
6899 fprintf (asm_out_file, "\t.restore sp\n");
6902 /* This function processes a SET pattern looking for specific patterns
6903 which result in emitting an assembly directive required for unwinding. */
6906 process_set (asm_out_file, pat)
6910 rtx src = SET_SRC (pat);
6911 rtx dest = SET_DEST (pat);
6912 int src_regno, dest_regno;
6914 /* Look for the ALLOC insn. */
6915 if (GET_CODE (src) == UNSPEC_VOLATILE
6916 && XINT (src, 1) == 0
6917 && GET_CODE (dest) == REG)
6919 dest_regno = REGNO (dest);
6921 /* If this isn't the final destination for ar.pfs, the alloc
6922 shouldn't have been marked frame related. */
6923 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6926 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
6927 ia64_dbx_register_number (dest_regno));
6931 /* Look for SP = .... */
6932 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6934 if (GET_CODE (src) == PLUS)
6936 rtx op0 = XEXP (src, 0);
6937 rtx op1 = XEXP (src, 1);
6938 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6940 if (INTVAL (op1) < 0)
6942 fputs ("\t.fframe ", asm_out_file);
6943 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6945 fputc ('\n', asm_out_file);
6948 process_epilogue ();
6953 else if (GET_CODE (src) == REG
6954 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
6955 process_epilogue ();
6962 /* Register move we need to look at. */
6963 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6965 src_regno = REGNO (src);
6966 dest_regno = REGNO (dest);
6971 /* Saving return address pointer. */
6972 if (dest_regno != current_frame_info.reg_save_b0)
6974 fprintf (asm_out_file, "\t.save rp, r%d\n",
6975 ia64_dbx_register_number (dest_regno));
6979 if (dest_regno != current_frame_info.reg_save_pr)
6981 fprintf (asm_out_file, "\t.save pr, r%d\n",
6982 ia64_dbx_register_number (dest_regno));
6985 case AR_UNAT_REGNUM:
6986 if (dest_regno != current_frame_info.reg_save_ar_unat)
6988 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6989 ia64_dbx_register_number (dest_regno));
6993 if (dest_regno != current_frame_info.reg_save_ar_lc)
6995 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6996 ia64_dbx_register_number (dest_regno));
6999 case STACK_POINTER_REGNUM:
7000 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7001 || ! frame_pointer_needed)
7003 fprintf (asm_out_file, "\t.vframe r%d\n",
7004 ia64_dbx_register_number (dest_regno));
7008 /* Everything else should indicate being stored to memory. */
7013 /* Memory store we need to look at. */
7014 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7020 if (GET_CODE (XEXP (dest, 0)) == REG)
7022 base = XEXP (dest, 0);
7025 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7026 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7028 base = XEXP (XEXP (dest, 0), 0);
7029 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7034 if (base == hard_frame_pointer_rtx)
7036 saveop = ".savepsp";
7039 else if (base == stack_pointer_rtx)
7044 src_regno = REGNO (src);
7048 if (current_frame_info.reg_save_b0 != 0)
7050 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7054 if (current_frame_info.reg_save_pr != 0)
7056 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7060 if (current_frame_info.reg_save_ar_lc != 0)
7062 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7066 if (current_frame_info.reg_save_ar_pfs != 0)
7068 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7071 case AR_UNAT_REGNUM:
7072 if (current_frame_info.reg_save_ar_unat != 0)
7074 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7081 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7082 1 << (src_regno - GR_REG (4)));
7090 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7091 1 << (src_regno - BR_REG (1)));
7098 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7099 1 << (src_regno - FR_REG (2)));
7102 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7103 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7104 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7105 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7106 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7107 1 << (src_regno - FR_REG (12)));
7119 /* This function looks at a single insn and emits any directives
7120 required to unwind this insn. */
7122 process_for_unwind_directive (asm_out_file, insn)
7126 if (flag_unwind_tables
7127 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7131 if (GET_CODE (insn) == NOTE
7132 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7134 block_num = NOTE_BASIC_BLOCK (insn)->index;
7136 /* Restore unwind state from immediately before the epilogue. */
7137 if (need_copy_state)
7139 fprintf (asm_out_file, "\t.body\n");
7140 fprintf (asm_out_file, "\t.copy_state 1\n");
7141 need_copy_state = 0;
7145 if (! RTX_FRAME_RELATED_P (insn))
7148 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7150 pat = XEXP (pat, 0);
7152 pat = PATTERN (insn);
7154 switch (GET_CODE (pat))
7157 process_set (asm_out_file, pat);
7163 int limit = XVECLEN (pat, 0);
7164 for (par_index = 0; par_index < limit; par_index++)
7166 rtx x = XVECEXP (pat, 0, par_index);
7167 if (GET_CODE (x) == SET)
7168 process_set (asm_out_file, x);
7181 ia64_init_builtins ()
7183 tree psi_type_node = build_pointer_type (integer_type_node);
7184 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7185 tree endlink = void_list_node;
7187 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7188 tree si_ftype_psi_si_si
7189 = build_function_type (integer_type_node,
7190 tree_cons (NULL_TREE, psi_type_node,
7191 tree_cons (NULL_TREE, integer_type_node,
7192 tree_cons (NULL_TREE,
7196 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7197 tree di_ftype_pdi_di_di
7198 = build_function_type (long_integer_type_node,
7199 tree_cons (NULL_TREE, pdi_type_node,
7200 tree_cons (NULL_TREE,
7201 long_integer_type_node,
7202 tree_cons (NULL_TREE,
7203 long_integer_type_node,
7205 /* __sync_synchronize */
7206 tree void_ftype_void
7207 = build_function_type (void_type_node, endlink);
7209 /* __sync_lock_test_and_set_si */
7210 tree si_ftype_psi_si
7211 = build_function_type (integer_type_node,
7212 tree_cons (NULL_TREE, psi_type_node,
7213 tree_cons (NULL_TREE, integer_type_node, endlink)));
7215 /* __sync_lock_test_and_set_di */
7216 tree di_ftype_pdi_di
7217 = build_function_type (long_integer_type_node,
7218 tree_cons (NULL_TREE, pdi_type_node,
7219 tree_cons (NULL_TREE, long_integer_type_node,
7222 /* __sync_lock_release_si */
7224 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7227 /* __sync_lock_release_di */
7229 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7232 #define def_builtin(name, type, code) \
7233 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7235 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7236 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7237 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7238 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7239 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7240 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7241 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7242 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7244 def_builtin ("__sync_synchronize", void_ftype_void,
7245 IA64_BUILTIN_SYNCHRONIZE);
7247 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7248 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7249 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7250 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7251 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7252 IA64_BUILTIN_LOCK_RELEASE_SI);
7253 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7254 IA64_BUILTIN_LOCK_RELEASE_DI);
7256 def_builtin ("__builtin_ia64_bsp",
7257 build_function_type (ptr_type_node, endlink),
7260 def_builtin ("__builtin_ia64_flushrs",
7261 build_function_type (void_type_node, endlink),
7262 IA64_BUILTIN_FLUSHRS);
7264 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7265 IA64_BUILTIN_FETCH_AND_ADD_SI);
7266 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7267 IA64_BUILTIN_FETCH_AND_SUB_SI);
7268 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7269 IA64_BUILTIN_FETCH_AND_OR_SI);
7270 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7271 IA64_BUILTIN_FETCH_AND_AND_SI);
7272 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7273 IA64_BUILTIN_FETCH_AND_XOR_SI);
7274 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7275 IA64_BUILTIN_FETCH_AND_NAND_SI);
7277 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7278 IA64_BUILTIN_ADD_AND_FETCH_SI);
7279 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7280 IA64_BUILTIN_SUB_AND_FETCH_SI);
7281 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7282 IA64_BUILTIN_OR_AND_FETCH_SI);
7283 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7284 IA64_BUILTIN_AND_AND_FETCH_SI);
7285 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7286 IA64_BUILTIN_XOR_AND_FETCH_SI);
7287 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7288 IA64_BUILTIN_NAND_AND_FETCH_SI);
7290 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7291 IA64_BUILTIN_FETCH_AND_ADD_DI);
7292 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7293 IA64_BUILTIN_FETCH_AND_SUB_DI);
7294 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7295 IA64_BUILTIN_FETCH_AND_OR_DI);
7296 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7297 IA64_BUILTIN_FETCH_AND_AND_DI);
7298 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7299 IA64_BUILTIN_FETCH_AND_XOR_DI);
7300 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7301 IA64_BUILTIN_FETCH_AND_NAND_DI);
7303 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7304 IA64_BUILTIN_ADD_AND_FETCH_DI);
7305 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7306 IA64_BUILTIN_SUB_AND_FETCH_DI);
7307 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7308 IA64_BUILTIN_OR_AND_FETCH_DI);
7309 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7310 IA64_BUILTIN_AND_AND_FETCH_DI);
7311 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7312 IA64_BUILTIN_XOR_AND_FETCH_DI);
7313 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7314 IA64_BUILTIN_NAND_AND_FETCH_DI);
7319 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7327 cmpxchgsz.acq tmp = [ptr], tmp
7328 } while (tmp != ret)
7332 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7334 enum machine_mode mode;
7338 rtx ret, label, tmp, ccv, insn, mem, value;
7341 arg0 = TREE_VALUE (arglist);
7342 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7343 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7344 value = expand_expr (arg1, NULL_RTX, mode, 0);
7346 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7347 MEM_VOLATILE_P (mem) = 1;
7349 if (target && register_operand (target, mode))
7352 ret = gen_reg_rtx (mode);
7354 emit_insn (gen_mf ());
7356 /* Special case for fetchadd instructions. */
7357 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7360 insn = gen_fetchadd_acq_si (ret, mem, value);
7362 insn = gen_fetchadd_acq_di (ret, mem, value);
7367 tmp = gen_reg_rtx (mode);
7368 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7369 emit_move_insn (tmp, mem);
7371 label = gen_label_rtx ();
7373 emit_move_insn (ret, tmp);
7374 emit_move_insn (ccv, tmp);
7376 /* Perform the specific operation. Special case NAND by noticing
7377 one_cmpl_optab instead. */
7378 if (binoptab == one_cmpl_optab)
7380 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7381 binoptab = and_optab;
7383 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7386 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7388 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7391 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
7396 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7404 cmpxchgsz.acq tmp = [ptr], ret
7405 } while (tmp != old)
7409 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7411 enum machine_mode mode;
7415 rtx old, label, tmp, ret, ccv, insn, mem, value;
7418 arg0 = TREE_VALUE (arglist);
7419 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7420 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7421 value = expand_expr (arg1, NULL_RTX, mode, 0);
7423 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7424 MEM_VOLATILE_P (mem) = 1;
7426 if (target && ! register_operand (target, mode))
7429 emit_insn (gen_mf ());
7430 tmp = gen_reg_rtx (mode);
7431 old = gen_reg_rtx (mode);
7432 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7434 emit_move_insn (tmp, mem);
7436 label = gen_label_rtx ();
7438 emit_move_insn (old, tmp);
7439 emit_move_insn (ccv, tmp);
7441 /* Perform the specific operation. Special case NAND by noticing
7442 one_cmpl_optab instead. */
7443 if (binoptab == one_cmpl_optab)
7445 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7446 binoptab = and_optab;
7448 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7451 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7453 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7456 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
7461 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7465 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7468 For bool_ it's the same except return ret == oldval.
7472 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7473 enum machine_mode mode;
7478 tree arg0, arg1, arg2;
7479 rtx mem, old, new, ccv, tmp, insn;
7481 arg0 = TREE_VALUE (arglist);
7482 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7483 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7484 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7485 old = expand_expr (arg1, NULL_RTX, mode, 0);
7486 new = expand_expr (arg2, NULL_RTX, mode, 0);
7488 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7489 MEM_VOLATILE_P (mem) = 1;
7491 if (! register_operand (old, mode))
7492 old = copy_to_mode_reg (mode, old);
7493 if (! register_operand (new, mode))
7494 new = copy_to_mode_reg (mode, new);
7496 if (! boolp && target && register_operand (target, mode))
7499 tmp = gen_reg_rtx (mode);
7501 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7502 emit_move_insn (ccv, old);
7503 emit_insn (gen_mf ());
7505 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7507 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7513 target = gen_reg_rtx (mode);
7514 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7520 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7523 ia64_expand_lock_test_and_set (mode, arglist, target)
7524 enum machine_mode mode;
7529 rtx mem, new, ret, insn;
7531 arg0 = TREE_VALUE (arglist);
7532 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7533 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7534 new = expand_expr (arg1, NULL_RTX, mode, 0);
7536 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7537 MEM_VOLATILE_P (mem) = 1;
7538 if (! register_operand (new, mode))
7539 new = copy_to_mode_reg (mode, new);
7541 if (target && register_operand (target, mode))
7544 ret = gen_reg_rtx (mode);
7547 insn = gen_xchgsi (ret, mem, new);
7549 insn = gen_xchgdi (ret, mem, new);
7555 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7558 ia64_expand_lock_release (mode, arglist, target)
7559 enum machine_mode mode;
7561 rtx target ATTRIBUTE_UNUSED;
7566 arg0 = TREE_VALUE (arglist);
7567 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7569 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7570 MEM_VOLATILE_P (mem) = 1;
7572 emit_move_insn (mem, const0_rtx);
7578 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7581 rtx subtarget ATTRIBUTE_UNUSED;
7582 enum machine_mode mode ATTRIBUTE_UNUSED;
7583 int ignore ATTRIBUTE_UNUSED;
7585 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7586 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7587 tree arglist = TREE_OPERAND (exp, 1);
7591 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7592 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7593 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7594 case IA64_BUILTIN_LOCK_RELEASE_SI:
7595 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7596 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7597 case IA64_BUILTIN_FETCH_AND_OR_SI:
7598 case IA64_BUILTIN_FETCH_AND_AND_SI:
7599 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7600 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7601 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7602 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7603 case IA64_BUILTIN_OR_AND_FETCH_SI:
7604 case IA64_BUILTIN_AND_AND_FETCH_SI:
7605 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7606 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7610 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7611 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7612 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7613 case IA64_BUILTIN_LOCK_RELEASE_DI:
7614 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7615 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7616 case IA64_BUILTIN_FETCH_AND_OR_DI:
7617 case IA64_BUILTIN_FETCH_AND_AND_DI:
7618 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7619 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7620 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7621 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7622 case IA64_BUILTIN_OR_AND_FETCH_DI:
7623 case IA64_BUILTIN_AND_AND_FETCH_DI:
7624 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7625 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7635 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7636 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7637 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7639 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7640 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7641 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7643 case IA64_BUILTIN_SYNCHRONIZE:
7644 emit_insn (gen_mf ());
7647 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7648 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7649 return ia64_expand_lock_test_and_set (mode, arglist, target);
7651 case IA64_BUILTIN_LOCK_RELEASE_SI:
7652 case IA64_BUILTIN_LOCK_RELEASE_DI:
7653 return ia64_expand_lock_release (mode, arglist, target);
7655 case IA64_BUILTIN_BSP:
7656 if (! target || ! register_operand (target, DImode))
7657 target = gen_reg_rtx (DImode);
7658 emit_insn (gen_bsp_value (target));
7661 case IA64_BUILTIN_FLUSHRS:
7662 emit_insn (gen_flushrs ());
7665 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7666 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7667 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7669 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7670 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7671 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7673 case IA64_BUILTIN_FETCH_AND_OR_SI:
7674 case IA64_BUILTIN_FETCH_AND_OR_DI:
7675 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7677 case IA64_BUILTIN_FETCH_AND_AND_SI:
7678 case IA64_BUILTIN_FETCH_AND_AND_DI:
7679 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7681 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7682 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7683 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7685 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7686 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7687 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7689 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7690 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7691 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7693 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7694 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7695 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7697 case IA64_BUILTIN_OR_AND_FETCH_SI:
7698 case IA64_BUILTIN_OR_AND_FETCH_DI:
7699 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7701 case IA64_BUILTIN_AND_AND_FETCH_SI:
7702 case IA64_BUILTIN_AND_AND_FETCH_DI:
7703 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7705 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7706 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7707 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7709 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7710 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7711 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);