1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
47 #include "target-def.h"
49 /* This is used for communication between ASM_OUTPUT_LABEL and
50 ASM_OUTPUT_LABELREF. */
51 int ia64_asm_output_label = 0;
53 /* Define the information needed to generate branch and scc insns. This is
54 stored from the compare operation. */
55 struct rtx_def * ia64_compare_op0;
56 struct rtx_def * ia64_compare_op1;
58 /* Register names for ia64_expand_prologue. */
59 static const char * const ia64_reg_numbers[96] =
60 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
61 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
62 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
63 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
64 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
65 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
66 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
67 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
68 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
69 "r104","r105","r106","r107","r108","r109","r110","r111",
70 "r112","r113","r114","r115","r116","r117","r118","r119",
71 "r120","r121","r122","r123","r124","r125","r126","r127"};
73 /* ??? These strings could be shared with REGISTER_NAMES. */
74 static const char * const ia64_input_reg_names[8] =
75 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77 /* ??? These strings could be shared with REGISTER_NAMES. */
78 static const char * const ia64_local_reg_names[80] =
79 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
80 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
81 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
82 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
83 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
84 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
85 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
86 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
87 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
88 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_output_reg_names[8] =
92 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94 /* String used with the -mfixed-range= option. */
95 const char *ia64_fixed_range_string;
97 /* Determines whether we run our final scheduling pass or not. We always
98 avoid the normal second scheduling pass. */
99 static int ia64_flag_schedule_insns2;
101 /* Variables which are this size or smaller are put in the sdata/sbss
104 unsigned int ia64_section_threshold;
106 static int find_gr_spill PARAMS ((int));
107 static int next_scratch_gr_reg PARAMS ((void));
108 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
109 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
110 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
111 static void finish_spill_pointers PARAMS ((void));
112 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
113 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
114 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
115 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
116 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
117 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
119 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
120 static void fix_range PARAMS ((const char *));
121 static void ia64_add_gc_roots PARAMS ((void));
122 static void ia64_init_machine_status PARAMS ((struct function *));
123 static void ia64_mark_machine_status PARAMS ((struct function *));
124 static void ia64_free_machine_status PARAMS ((struct function *));
125 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
126 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
127 static void emit_predicate_relation_info PARAMS ((void));
128 static void process_epilogue PARAMS ((void));
129 static int process_set PARAMS ((FILE *, rtx));
131 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
133 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
135 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
137 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
139 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
140 static int ia64_valid_type_attribute PARAMS((tree, tree, tree, tree));
141 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
142 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
143 static void ia64_output_function_end_prologue PARAMS ((FILE *));
145 static int ia64_issue_rate PARAMS ((void));
146 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
147 static void ia64_sched_init PARAMS ((FILE *, int, int));
148 static void ia64_sched_finish PARAMS ((FILE *, int));
149 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
151 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
152 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
153 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
154 static rtx ia64_cycle_display PARAMS ((int, rtx));
157 /* Initialize the GCC target structure. */
158 #undef TARGET_VALID_TYPE_ATTRIBUTE
159 #define TARGET_VALID_TYPE_ATTRIBUTE ia64_valid_type_attribute
161 #undef TARGET_INIT_BUILTINS
162 #define TARGET_INIT_BUILTINS ia64_init_builtins
164 #undef TARGET_EXPAND_BUILTIN
165 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
167 #undef TARGET_ASM_FUNCTION_PROLOGUE
168 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
169 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
170 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
171 #undef TARGET_ASM_FUNCTION_EPILOGUE
172 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
174 #undef TARGET_SCHED_ADJUST_COST
175 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
176 #undef TARGET_SCHED_ISSUE_RATE
177 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
178 #undef TARGET_SCHED_VARIABLE_ISSUE
179 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
180 #undef TARGET_SCHED_INIT
181 #define TARGET_SCHED_INIT ia64_sched_init
182 #undef TARGET_SCHED_FINISH
183 #define TARGET_SCHED_FINISH ia64_sched_finish
184 #undef TARGET_SCHED_REORDER
185 #define TARGET_SCHED_REORDER ia64_sched_reorder
186 #undef TARGET_SCHED_REORDER2
187 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
188 #undef TARGET_SCHED_CYCLE_DISPLAY
189 #define TARGET_SCHED_CYCLE_DISPLAY ia64_cycle_display
191 struct gcc_target targetm = TARGET_INITIALIZER;
193 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
196 call_operand (op, mode)
198 enum machine_mode mode;
200 if (mode != GET_MODE (op))
203 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
204 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
207 /* Return 1 if OP refers to a symbol in the sdata section. */
210 sdata_symbolic_operand (op, mode)
212 enum machine_mode mode ATTRIBUTE_UNUSED;
214 switch (GET_CODE (op))
217 if (GET_CODE (XEXP (op, 0)) != PLUS
218 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
220 op = XEXP (XEXP (op, 0), 0);
224 if (CONSTANT_POOL_ADDRESS_P (op))
225 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
227 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
236 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
239 got_symbolic_operand (op, mode)
241 enum machine_mode mode ATTRIBUTE_UNUSED;
243 switch (GET_CODE (op))
247 if (GET_CODE (op) != PLUS)
249 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
252 if (GET_CODE (op) != CONST_INT)
257 /* Ok if we're not using GOT entries at all. */
258 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
261 /* "Ok" while emitting rtl, since otherwise we won't be provided
262 with the entire offset during emission, which makes it very
263 hard to split the offset into high and low parts. */
264 if (rtx_equal_function_value_matters)
267 /* Force the low 14 bits of the constant to zero so that we do not
268 use up so many GOT entries. */
269 return (INTVAL (op) & 0x3fff) == 0;
281 /* Return 1 if OP refers to a symbol. */
284 symbolic_operand (op, mode)
286 enum machine_mode mode ATTRIBUTE_UNUSED;
288 switch (GET_CODE (op))
301 /* Return 1 if OP refers to a function. */
304 function_operand (op, mode)
306 enum machine_mode mode ATTRIBUTE_UNUSED;
308 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
314 /* Return 1 if OP is setjmp or a similar function. */
316 /* ??? This is an unsatisfying solution. Should rethink. */
319 setjmp_operand (op, mode)
321 enum machine_mode mode ATTRIBUTE_UNUSED;
326 if (GET_CODE (op) != SYMBOL_REF)
331 /* The following code is borrowed from special_function_p in calls.c. */
333 /* Disregard prefix _, __ or __x. */
336 if (name[1] == '_' && name[2] == 'x')
338 else if (name[1] == '_')
348 && (! strcmp (name, "setjmp")
349 || ! strcmp (name, "setjmp_syscall")))
351 && ! strcmp (name, "sigsetjmp"))
353 && ! strcmp (name, "savectx")));
355 else if ((name[0] == 'q' && name[1] == 's'
356 && ! strcmp (name, "qsetjmp"))
357 || (name[0] == 'v' && name[1] == 'f'
358 && ! strcmp (name, "vfork")))
364 /* Return 1 if OP is a general operand, but when pic exclude symbolic
367 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
368 from PREDICATE_CODES. */
371 move_operand (op, mode)
373 enum machine_mode mode;
375 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
378 return general_operand (op, mode);
381 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
384 gr_register_operand (op, mode)
386 enum machine_mode mode;
388 if (! register_operand (op, mode))
390 if (GET_CODE (op) == SUBREG)
391 op = SUBREG_REG (op);
392 if (GET_CODE (op) == REG)
394 unsigned int regno = REGNO (op);
395 if (regno < FIRST_PSEUDO_REGISTER)
396 return GENERAL_REGNO_P (regno);
401 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
404 fr_register_operand (op, mode)
406 enum machine_mode mode;
408 if (! register_operand (op, mode))
410 if (GET_CODE (op) == SUBREG)
411 op = SUBREG_REG (op);
412 if (GET_CODE (op) == REG)
414 unsigned int regno = REGNO (op);
415 if (regno < FIRST_PSEUDO_REGISTER)
416 return FR_REGNO_P (regno);
421 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
424 grfr_register_operand (op, mode)
426 enum machine_mode mode;
428 if (! register_operand (op, mode))
430 if (GET_CODE (op) == SUBREG)
431 op = SUBREG_REG (op);
432 if (GET_CODE (op) == REG)
434 unsigned int regno = REGNO (op);
435 if (regno < FIRST_PSEUDO_REGISTER)
436 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
441 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
444 gr_nonimmediate_operand (op, mode)
446 enum machine_mode mode;
448 if (! nonimmediate_operand (op, mode))
450 if (GET_CODE (op) == SUBREG)
451 op = SUBREG_REG (op);
452 if (GET_CODE (op) == REG)
454 unsigned int regno = REGNO (op);
455 if (regno < FIRST_PSEUDO_REGISTER)
456 return GENERAL_REGNO_P (regno);
461 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
464 fr_nonimmediate_operand (op, mode)
466 enum machine_mode mode;
468 if (! nonimmediate_operand (op, mode))
470 if (GET_CODE (op) == SUBREG)
471 op = SUBREG_REG (op);
472 if (GET_CODE (op) == REG)
474 unsigned int regno = REGNO (op);
475 if (regno < FIRST_PSEUDO_REGISTER)
476 return FR_REGNO_P (regno);
481 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
484 grfr_nonimmediate_operand (op, mode)
486 enum machine_mode mode;
488 if (! nonimmediate_operand (op, mode))
490 if (GET_CODE (op) == SUBREG)
491 op = SUBREG_REG (op);
492 if (GET_CODE (op) == REG)
494 unsigned int regno = REGNO (op);
495 if (regno < FIRST_PSEUDO_REGISTER)
496 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
501 /* Return 1 if OP is a GR register operand, or zero. */
504 gr_reg_or_0_operand (op, mode)
506 enum machine_mode mode;
508 return (op == const0_rtx || gr_register_operand (op, mode));
511 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
514 gr_reg_or_5bit_operand (op, mode)
516 enum machine_mode mode;
518 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
519 || GET_CODE (op) == CONSTANT_P_RTX
520 || gr_register_operand (op, mode));
523 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
526 gr_reg_or_6bit_operand (op, mode)
528 enum machine_mode mode;
530 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
531 || GET_CODE (op) == CONSTANT_P_RTX
532 || gr_register_operand (op, mode));
535 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
538 gr_reg_or_8bit_operand (op, mode)
540 enum machine_mode mode;
542 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
543 || GET_CODE (op) == CONSTANT_P_RTX
544 || gr_register_operand (op, mode));
547 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
550 grfr_reg_or_8bit_operand (op, mode)
552 enum machine_mode mode;
554 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
555 || GET_CODE (op) == CONSTANT_P_RTX
556 || grfr_register_operand (op, mode));
559 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
563 gr_reg_or_8bit_adjusted_operand (op, mode)
565 enum machine_mode mode;
567 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
568 || GET_CODE (op) == CONSTANT_P_RTX
569 || gr_register_operand (op, mode));
572 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
573 immediate and an 8 bit adjusted immediate operand. This is necessary
574 because when we emit a compare, we don't know what the condition will be,
575 so we need the union of the immediates accepted by GT and LT. */
578 gr_reg_or_8bit_and_adjusted_operand (op, mode)
580 enum machine_mode mode;
582 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
583 && CONST_OK_FOR_L (INTVAL (op)))
584 || GET_CODE (op) == CONSTANT_P_RTX
585 || gr_register_operand (op, mode));
588 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
591 gr_reg_or_14bit_operand (op, mode)
593 enum machine_mode mode;
595 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
596 || GET_CODE (op) == CONSTANT_P_RTX
597 || gr_register_operand (op, mode));
600 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
603 gr_reg_or_22bit_operand (op, mode)
605 enum machine_mode mode;
607 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
608 || GET_CODE (op) == CONSTANT_P_RTX
609 || gr_register_operand (op, mode));
612 /* Return 1 if OP is a 6 bit immediate operand. */
615 shift_count_operand (op, mode)
617 enum machine_mode mode ATTRIBUTE_UNUSED;
619 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
620 || GET_CODE (op) == CONSTANT_P_RTX);
623 /* Return 1 if OP is a 5 bit immediate operand. */
626 shift_32bit_count_operand (op, mode)
628 enum machine_mode mode ATTRIBUTE_UNUSED;
630 return ((GET_CODE (op) == CONST_INT
631 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
632 || GET_CODE (op) == CONSTANT_P_RTX);
635 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
638 shladd_operand (op, mode)
640 enum machine_mode mode ATTRIBUTE_UNUSED;
642 return (GET_CODE (op) == CONST_INT
643 && (INTVAL (op) == 2 || INTVAL (op) == 4
644 || INTVAL (op) == 8 || INTVAL (op) == 16));
647 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
650 fetchadd_operand (op, mode)
652 enum machine_mode mode ATTRIBUTE_UNUSED;
654 return (GET_CODE (op) == CONST_INT
655 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
656 INTVAL (op) == -4 || INTVAL (op) == -1 ||
657 INTVAL (op) == 1 || INTVAL (op) == 4 ||
658 INTVAL (op) == 8 || INTVAL (op) == 16));
661 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
664 fr_reg_or_fp01_operand (op, mode)
666 enum machine_mode mode;
668 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
669 || fr_register_operand (op, mode));
672 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
673 POST_MODIFY with a REG as displacement. */
676 destination_operand (op, mode)
678 enum machine_mode mode;
680 if (! nonimmediate_operand (op, mode))
682 if (GET_CODE (op) == MEM
683 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
684 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
689 /* Like memory_operand, but don't allow post-increments. */
692 not_postinc_memory_operand (op, mode)
694 enum machine_mode mode;
696 return (memory_operand (op, mode)
697 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
700 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
701 signed immediate operand. */
704 normal_comparison_operator (op, mode)
706 enum machine_mode mode;
708 enum rtx_code code = GET_CODE (op);
709 return ((mode == VOIDmode || GET_MODE (op) == mode)
710 && (code == EQ || code == NE
711 || code == GT || code == LE || code == GTU || code == LEU));
714 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
715 signed immediate operand. */
718 adjusted_comparison_operator (op, mode)
720 enum machine_mode mode;
722 enum rtx_code code = GET_CODE (op);
723 return ((mode == VOIDmode || GET_MODE (op) == mode)
724 && (code == LT || code == GE || code == LTU || code == GEU));
727 /* Return 1 if this is a signed inequality operator. */
730 signed_inequality_operator (op, mode)
732 enum machine_mode mode;
734 enum rtx_code code = GET_CODE (op);
735 return ((mode == VOIDmode || GET_MODE (op) == mode)
736 && (code == GE || code == GT
737 || code == LE || code == LT));
740 /* Return 1 if this operator is valid for predication. */
743 predicate_operator (op, mode)
745 enum machine_mode mode;
747 enum rtx_code code = GET_CODE (op);
748 return ((GET_MODE (op) == mode || mode == VOIDmode)
749 && (code == EQ || code == NE));
752 /* Return 1 if this operator can be used in a conditional operation. */
755 condop_operator (op, mode)
757 enum machine_mode mode;
759 enum rtx_code code = GET_CODE (op);
760 return ((GET_MODE (op) == mode || mode == VOIDmode)
761 && (code == PLUS || code == MINUS || code == AND
762 || code == IOR || code == XOR));
765 /* Return 1 if this is the ar.lc register. */
768 ar_lc_reg_operand (op, mode)
770 enum machine_mode mode;
772 return (GET_MODE (op) == DImode
773 && (mode == DImode || mode == VOIDmode)
774 && GET_CODE (op) == REG
775 && REGNO (op) == AR_LC_REGNUM);
778 /* Return 1 if this is the ar.ccv register. */
781 ar_ccv_reg_operand (op, mode)
783 enum machine_mode mode;
785 return ((GET_MODE (op) == mode || mode == VOIDmode)
786 && GET_CODE (op) == REG
787 && REGNO (op) == AR_CCV_REGNUM);
790 /* Like general_operand, but don't allow (mem (addressof)). */
793 general_tfmode_operand (op, mode)
795 enum machine_mode mode;
797 if (! general_operand (op, mode))
799 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
807 destination_tfmode_operand (op, mode)
809 enum machine_mode mode;
811 if (! destination_operand (op, mode))
813 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
821 tfreg_or_fp01_operand (op, mode)
823 enum machine_mode mode;
825 if (GET_CODE (op) == SUBREG)
827 return fr_reg_or_fp01_operand (op, mode);
830 /* Return 1 if the operands of a move are ok. */
833 ia64_move_ok (dst, src)
836 /* If we're under init_recog_no_volatile, we'll not be able to use
837 memory_operand. So check the code directly and don't worry about
838 the validity of the underlying address, which should have been
839 checked elsewhere anyway. */
840 if (GET_CODE (dst) != MEM)
842 if (GET_CODE (src) == MEM)
844 if (register_operand (src, VOIDmode))
847 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
848 if (INTEGRAL_MODE_P (GET_MODE (dst)))
849 return src == const0_rtx;
851 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
854 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
855 Return the length of the field, or <= 0 on failure. */
858 ia64_depz_field_mask (rop, rshift)
861 unsigned HOST_WIDE_INT op = INTVAL (rop);
862 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
864 /* Get rid of the zero bits we're shifting in. */
867 /* We must now have a solid block of 1's at bit 0. */
868 return exact_log2 (op + 1);
871 /* Expand a symbolic constant load. */
872 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
875 ia64_expand_load_address (dest, src, scratch)
876 rtx dest, src, scratch;
880 /* The destination could be a MEM during initial rtl generation,
881 which isn't a valid destination for the PIC load address patterns. */
882 if (! register_operand (dest, DImode))
883 temp = gen_reg_rtx (DImode);
888 emit_insn (gen_load_gprel64 (temp, src));
889 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
890 emit_insn (gen_load_fptr (temp, src));
891 else if (sdata_symbolic_operand (src, DImode))
892 emit_insn (gen_load_gprel (temp, src));
893 else if (GET_CODE (src) == CONST
894 && GET_CODE (XEXP (src, 0)) == PLUS
895 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
896 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
898 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
899 rtx sym = XEXP (XEXP (src, 0), 0);
900 HOST_WIDE_INT ofs, hi, lo;
902 /* Split the offset into a sign extended 14-bit low part
903 and a complementary high part. */
904 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
905 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
909 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
911 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
913 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
919 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
921 insn = emit_insn (gen_load_symptr (temp, src, scratch));
922 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
926 emit_move_insn (dest, temp);
930 ia64_gp_save_reg (setjmp_p)
933 rtx save = cfun->machine->ia64_gp_save;
937 /* We can't save GP in a pseudo if we are calling setjmp, because
938 pseudos won't be restored by longjmp. For now, we save it in r4. */
939 /* ??? It would be more efficient to save this directly into a stack
940 slot. Unfortunately, the stack slot address gets cse'd across
941 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
944 /* ??? Get the barf bag, Virginia. We've got to replace this thing
945 in place, since this rtx is used in exception handling receivers.
946 Moreover, we must get this rtx out of regno_reg_rtx or reload
947 will do the wrong thing. */
948 unsigned int old_regno = REGNO (save);
949 if (setjmp_p && old_regno != GR_REG (4))
951 REGNO (save) = GR_REG (4);
952 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
958 save = gen_rtx_REG (DImode, GR_REG (4));
960 save = gen_rtx_REG (DImode, LOC_REG (0));
962 save = gen_reg_rtx (DImode);
963 cfun->machine->ia64_gp_save = save;
969 /* Split a post-reload TImode reference into two DImode components. */
972 ia64_split_timode (out, in, scratch)
976 switch (GET_CODE (in))
979 out[0] = gen_rtx_REG (DImode, REGNO (in));
980 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
985 rtx base = XEXP (in, 0);
987 switch (GET_CODE (base))
990 out[0] = adjust_address (in, DImode, 0);
993 base = XEXP (base, 0);
994 out[0] = adjust_address (in, DImode, 0);
997 /* Since we're changing the mode, we need to change to POST_MODIFY
998 as well to preserve the size of the increment. Either that or
999 do the update in two steps, but we've already got this scratch
1000 register handy so let's use it. */
1002 base = XEXP (base, 0);
1004 = change_address (in, DImode,
1006 (Pmode, base, plus_constant (base, 16)));
1009 base = XEXP (base, 0);
1011 = change_address (in, DImode,
1013 (Pmode, base, plus_constant (base, -16)));
1019 if (scratch == NULL_RTX)
1021 out[1] = change_address (in, DImode, scratch);
1022 return gen_adddi3 (scratch, base, GEN_INT (8));
1027 split_double (in, &out[0], &out[1]);
1035 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1036 through memory plus an extra GR scratch register. Except that you can
1037 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1038 SECONDARY_RELOAD_CLASS, but not both.
1040 We got into problems in the first place by allowing a construct like
1041 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1042 This solution attempts to prevent this situation from ocurring. When
1043 we see something like the above, we spill the inner register to memory. */
1046 spill_tfmode_operand (in, force)
1050 if (GET_CODE (in) == SUBREG
1051 && GET_MODE (SUBREG_REG (in)) == TImode
1052 && GET_CODE (SUBREG_REG (in)) == REG)
1054 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1055 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1057 else if (force && GET_CODE (in) == REG)
1059 rtx mem = gen_mem_addressof (in, NULL_TREE);
1060 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1062 else if (GET_CODE (in) == MEM
1063 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1064 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1069 /* Emit comparison instruction if necessary, returning the expression
1070 that holds the compare result in the proper mode. */
1073 ia64_expand_compare (code, mode)
1075 enum machine_mode mode;
1077 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1080 /* If we have a BImode input, then we already have a compare result, and
1081 do not need to emit another comparison. */
1082 if (GET_MODE (op0) == BImode)
1084 if ((code == NE || code == EQ) && op1 == const0_rtx)
1091 cmp = gen_reg_rtx (BImode);
1092 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1093 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1097 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1100 /* Emit the appropriate sequence for a call. */
1103 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1109 rtx insn, b0, gp_save, narg_rtx;
1112 addr = XEXP (addr, 0);
1113 b0 = gen_rtx_REG (DImode, R_BR (0));
1117 else if (IN_REGNO_P (REGNO (nextarg)))
1118 narg = REGNO (nextarg) - IN_REG (0);
1120 narg = REGNO (nextarg) - OUT_REG (0);
1121 narg_rtx = GEN_INT (narg);
1123 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1126 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1128 insn = gen_call_nopic (addr, narg_rtx, b0);
1130 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1131 emit_call_insn (insn);
1138 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1140 /* If this is an indirect call, then we have the address of a descriptor. */
1141 if (! symbolic_operand (addr, VOIDmode))
1146 emit_move_insn (gp_save, pic_offset_table_rtx);
1148 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1149 emit_move_insn (pic_offset_table_rtx,
1150 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1153 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1155 insn = gen_call_pic (dest, narg_rtx, b0);
1157 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1158 emit_call_insn (insn);
1161 emit_move_insn (pic_offset_table_rtx, gp_save);
1163 else if (TARGET_CONST_GP)
1166 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1168 insn = gen_call_nopic (addr, narg_rtx, b0);
1170 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1171 emit_call_insn (insn);
1176 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1179 emit_move_insn (gp_save, pic_offset_table_rtx);
1182 insn = gen_call_pic (addr, narg_rtx, b0);
1184 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1185 emit_call_insn (insn);
1187 emit_move_insn (pic_offset_table_rtx, gp_save);
1192 /* Begin the assembly file. */
1195 emit_safe_across_calls (f)
1198 unsigned int rs, re;
1205 while (rs < 64 && call_used_regs[PR_REG (rs)])
1209 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1213 fputs ("\t.pred.safe_across_calls ", f);
1219 fprintf (f, "p%u", rs);
1221 fprintf (f, "p%u-p%u", rs, re - 1);
1229 /* Structure to be filled in by ia64_compute_frame_size with register
1230 save masks and offsets for the current function. */
1232 struct ia64_frame_info
1234 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1235 the caller's scratch area. */
1236 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1237 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1238 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1239 HARD_REG_SET mask; /* mask of saved registers. */
1240 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1241 registers or long-term scratches. */
1242 int n_spilled; /* number of spilled registers. */
1243 int reg_fp; /* register for fp. */
1244 int reg_save_b0; /* save register for b0. */
1245 int reg_save_pr; /* save register for prs. */
1246 int reg_save_ar_pfs; /* save register for ar.pfs. */
1247 int reg_save_ar_unat; /* save register for ar.unat. */
1248 int reg_save_ar_lc; /* save register for ar.lc. */
1249 int n_input_regs; /* number of input registers used. */
1250 int n_local_regs; /* number of local registers used. */
1251 int n_output_regs; /* number of output registers used. */
1252 int n_rotate_regs; /* number of rotating registers used. */
1254 char need_regstk; /* true if a .regstk directive needed. */
1255 char initialized; /* true if the data is finalized. */
1258 /* Current frame information calculated by ia64_compute_frame_size. */
1259 static struct ia64_frame_info current_frame_info;
1261 /* Helper function for ia64_compute_frame_size: find an appropriate general
1262 register to spill some special register to. SPECIAL_SPILL_MASK contains
1263 bits in GR0 to GR31 that have already been allocated by this routine.
1264 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1267 find_gr_spill (try_locals)
1272 /* If this is a leaf function, first try an otherwise unused
1273 call-clobbered register. */
1274 if (current_function_is_leaf)
1276 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1277 if (! regs_ever_live[regno]
1278 && call_used_regs[regno]
1279 && ! fixed_regs[regno]
1280 && ! global_regs[regno]
1281 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1283 current_frame_info.gr_used_mask |= 1 << regno;
1290 regno = current_frame_info.n_local_regs;
1291 /* If there is a frame pointer, then we can't use loc79, because
1292 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1293 reg_name switching code in ia64_expand_prologue. */
1294 if (regno < (80 - frame_pointer_needed))
1296 current_frame_info.n_local_regs = regno + 1;
1297 return LOC_REG (0) + regno;
1301 /* Failed to find a general register to spill to. Must use stack. */
1305 /* In order to make for nice schedules, we try to allocate every temporary
1306 to a different register. We must of course stay away from call-saved,
1307 fixed, and global registers. We must also stay away from registers
1308 allocated in current_frame_info.gr_used_mask, since those include regs
1309 used all through the prologue.
1311 Any register allocated here must be used immediately. The idea is to
1312 aid scheduling, not to solve data flow problems. */
1314 static int last_scratch_gr_reg;
1317 next_scratch_gr_reg ()
1321 for (i = 0; i < 32; ++i)
1323 regno = (last_scratch_gr_reg + i + 1) & 31;
1324 if (call_used_regs[regno]
1325 && ! fixed_regs[regno]
1326 && ! global_regs[regno]
1327 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1329 last_scratch_gr_reg = regno;
1334 /* There must be _something_ available. */
1338 /* Helper function for ia64_compute_frame_size, called through
1339 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1342 mark_reg_gr_used_mask (reg, data)
1344 void *data ATTRIBUTE_UNUSED;
1346 unsigned int regno = REGNO (reg);
1348 current_frame_info.gr_used_mask |= 1 << regno;
1351 /* Returns the number of bytes offset between the frame pointer and the stack
1352 pointer for the current function. SIZE is the number of bytes of space
1353 needed for local variables. */
1356 ia64_compute_frame_size (size)
1359 HOST_WIDE_INT total_size;
1360 HOST_WIDE_INT spill_size = 0;
1361 HOST_WIDE_INT extra_spill_size = 0;
1362 HOST_WIDE_INT pretend_args_size;
1365 int spilled_gr_p = 0;
1366 int spilled_fr_p = 0;
1370 if (current_frame_info.initialized)
1373 memset (¤t_frame_info, 0, sizeof current_frame_info);
1374 CLEAR_HARD_REG_SET (mask);
1376 /* Don't allocate scratches to the return register. */
1377 diddle_return_value (mark_reg_gr_used_mask, NULL);
1379 /* Don't allocate scratches to the EH scratch registers. */
1380 if (cfun->machine->ia64_eh_epilogue_sp)
1381 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1382 if (cfun->machine->ia64_eh_epilogue_bsp)
1383 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1385 /* Find the size of the register stack frame. We have only 80 local
1386 registers, because we reserve 8 for the inputs and 8 for the
1389 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1390 since we'll be adjusting that down later. */
1391 regno = LOC_REG (78) + ! frame_pointer_needed;
1392 for (; regno >= LOC_REG (0); regno--)
1393 if (regs_ever_live[regno])
1395 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1397 /* For functions marked with the syscall_linkage attribute, we must mark
1398 all eight input registers as in use, so that locals aren't visible to
1401 if (cfun->machine->n_varargs > 0
1402 || lookup_attribute ("syscall_linkage",
1403 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1404 current_frame_info.n_input_regs = 8;
1407 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1408 if (regs_ever_live[regno])
1410 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1413 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1414 if (regs_ever_live[regno])
1416 i = regno - OUT_REG (0) + 1;
1418 /* When -p profiling, we need one output register for the mcount argument.
1419 Likwise for -a profiling for the bb_init_func argument. For -ax
1420 profiling, we need two output registers for the two bb_init_trace_func
1422 if (profile_flag || profile_block_flag == 1)
1424 else if (profile_block_flag == 2)
1426 current_frame_info.n_output_regs = i;
1428 /* ??? No rotating register support yet. */
1429 current_frame_info.n_rotate_regs = 0;
1431 /* Discover which registers need spilling, and how much room that
1432 will take. Begin with floating point and general registers,
1433 which will always wind up on the stack. */
1435 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1436 if (regs_ever_live[regno] && ! call_used_regs[regno])
1438 SET_HARD_REG_BIT (mask, regno);
1444 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1445 if (regs_ever_live[regno] && ! call_used_regs[regno])
1447 SET_HARD_REG_BIT (mask, regno);
1453 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1454 if (regs_ever_live[regno] && ! call_used_regs[regno])
1456 SET_HARD_REG_BIT (mask, regno);
1461 /* Now come all special registers that might get saved in other
1462 general registers. */
1464 if (frame_pointer_needed)
1466 current_frame_info.reg_fp = find_gr_spill (1);
1467 /* If we did not get a register, then we take LOC79. This is guaranteed
1468 to be free, even if regs_ever_live is already set, because this is
1469 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1470 as we don't count loc79 above. */
1471 if (current_frame_info.reg_fp == 0)
1473 current_frame_info.reg_fp = LOC_REG (79);
1474 current_frame_info.n_local_regs++;
1478 if (! current_function_is_leaf)
1480 /* Emit a save of BR0 if we call other functions. Do this even
1481 if this function doesn't return, as EH depends on this to be
1482 able to unwind the stack. */
1483 SET_HARD_REG_BIT (mask, BR_REG (0));
1485 current_frame_info.reg_save_b0 = find_gr_spill (1);
1486 if (current_frame_info.reg_save_b0 == 0)
1492 /* Similarly for ar.pfs. */
1493 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1494 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1495 if (current_frame_info.reg_save_ar_pfs == 0)
1497 extra_spill_size += 8;
1503 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1505 SET_HARD_REG_BIT (mask, BR_REG (0));
1511 /* Unwind descriptor hackery: things are most efficient if we allocate
1512 consecutive GR save registers for RP, PFS, FP in that order. However,
1513 it is absolutely critical that FP get the only hard register that's
1514 guaranteed to be free, so we allocated it first. If all three did
1515 happen to be allocated hard regs, and are consecutive, rearrange them
1516 into the preferred order now. */
1517 if (current_frame_info.reg_fp != 0
1518 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1519 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1521 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1522 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1523 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1526 /* See if we need to store the predicate register block. */
1527 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1528 if (regs_ever_live[regno] && ! call_used_regs[regno])
1530 if (regno <= PR_REG (63))
1532 SET_HARD_REG_BIT (mask, PR_REG (0));
1533 current_frame_info.reg_save_pr = find_gr_spill (1);
1534 if (current_frame_info.reg_save_pr == 0)
1536 extra_spill_size += 8;
1540 /* ??? Mark them all as used so that register renaming and such
1541 are free to use them. */
1542 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1543 regs_ever_live[regno] = 1;
1546 /* If we're forced to use st8.spill, we're forced to save and restore
1548 if (spilled_gr_p || cfun->machine->n_varargs)
1550 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1551 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1552 if (current_frame_info.reg_save_ar_unat == 0)
1554 extra_spill_size += 8;
1559 if (regs_ever_live[AR_LC_REGNUM])
1561 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1562 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1563 if (current_frame_info.reg_save_ar_lc == 0)
1565 extra_spill_size += 8;
1570 /* If we have an odd number of words of pretend arguments written to
1571 the stack, then the FR save area will be unaligned. We round the
1572 size of this area up to keep things 16 byte aligned. */
1574 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1576 pretend_args_size = current_function_pretend_args_size;
1578 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1579 + current_function_outgoing_args_size);
1580 total_size = IA64_STACK_ALIGN (total_size);
1582 /* We always use the 16-byte scratch area provided by the caller, but
1583 if we are a leaf function, there's no one to which we need to provide
1585 if (current_function_is_leaf)
1586 total_size = MAX (0, total_size - 16);
1588 current_frame_info.total_size = total_size;
1589 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1590 current_frame_info.spill_size = spill_size;
1591 current_frame_info.extra_spill_size = extra_spill_size;
1592 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1593 current_frame_info.n_spilled = n_spilled;
1594 current_frame_info.initialized = reload_completed;
1597 /* Compute the initial difference between the specified pair of registers. */
1600 ia64_initial_elimination_offset (from, to)
1603 HOST_WIDE_INT offset;
1605 ia64_compute_frame_size (get_frame_size ());
1608 case FRAME_POINTER_REGNUM:
1609 if (to == HARD_FRAME_POINTER_REGNUM)
1611 if (current_function_is_leaf)
1612 offset = -current_frame_info.total_size;
1614 offset = -(current_frame_info.total_size
1615 - current_function_outgoing_args_size - 16);
1617 else if (to == STACK_POINTER_REGNUM)
1619 if (current_function_is_leaf)
1622 offset = 16 + current_function_outgoing_args_size;
1628 case ARG_POINTER_REGNUM:
1629 /* Arguments start above the 16 byte save area, unless stdarg
1630 in which case we store through the 16 byte save area. */
1631 if (to == HARD_FRAME_POINTER_REGNUM)
1632 offset = 16 - current_function_pretend_args_size;
1633 else if (to == STACK_POINTER_REGNUM)
1634 offset = (current_frame_info.total_size
1635 + 16 - current_function_pretend_args_size);
1640 case RETURN_ADDRESS_POINTER_REGNUM:
1651 /* If there are more than a trivial number of register spills, we use
1652 two interleaved iterators so that we can get two memory references
1655 In order to simplify things in the prologue and epilogue expanders,
1656 we use helper functions to fix up the memory references after the
1657 fact with the appropriate offsets to a POST_MODIFY memory mode.
1658 The following data structure tracks the state of the two iterators
1659 while insns are being emitted. */
1661 struct spill_fill_data
1663 rtx init_after; /* point at which to emit intializations */
1664 rtx init_reg[2]; /* initial base register */
1665 rtx iter_reg[2]; /* the iterator registers */
1666 rtx *prev_addr[2]; /* address of last memory use */
1667 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1668 HOST_WIDE_INT prev_off[2]; /* last offset */
1669 int n_iter; /* number of iterators in use */
1670 int next_iter; /* next iterator to use */
1671 unsigned int save_gr_used_mask;
1674 static struct spill_fill_data spill_fill_data;
1677 setup_spill_pointers (n_spills, init_reg, cfa_off)
1680 HOST_WIDE_INT cfa_off;
1684 spill_fill_data.init_after = get_last_insn ();
1685 spill_fill_data.init_reg[0] = init_reg;
1686 spill_fill_data.init_reg[1] = init_reg;
1687 spill_fill_data.prev_addr[0] = NULL;
1688 spill_fill_data.prev_addr[1] = NULL;
1689 spill_fill_data.prev_insn[0] = NULL;
1690 spill_fill_data.prev_insn[1] = NULL;
1691 spill_fill_data.prev_off[0] = cfa_off;
1692 spill_fill_data.prev_off[1] = cfa_off;
1693 spill_fill_data.next_iter = 0;
1694 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1696 spill_fill_data.n_iter = 1 + (n_spills > 2);
1697 for (i = 0; i < spill_fill_data.n_iter; ++i)
1699 int regno = next_scratch_gr_reg ();
1700 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1701 current_frame_info.gr_used_mask |= 1 << regno;
1706 finish_spill_pointers ()
1708 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1712 spill_restore_mem (reg, cfa_off)
1714 HOST_WIDE_INT cfa_off;
1716 int iter = spill_fill_data.next_iter;
1717 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1718 rtx disp_rtx = GEN_INT (disp);
1721 if (spill_fill_data.prev_addr[iter])
1723 if (CONST_OK_FOR_N (disp))
1725 *spill_fill_data.prev_addr[iter]
1726 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1727 gen_rtx_PLUS (DImode,
1728 spill_fill_data.iter_reg[iter],
1730 REG_NOTES (spill_fill_data.prev_insn[iter])
1731 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1732 REG_NOTES (spill_fill_data.prev_insn[iter]));
1736 /* ??? Could use register post_modify for loads. */
1737 if (! CONST_OK_FOR_I (disp))
1739 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1740 emit_move_insn (tmp, disp_rtx);
1743 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1744 spill_fill_data.iter_reg[iter], disp_rtx));
1747 /* Micro-optimization: if we've created a frame pointer, it's at
1748 CFA 0, which may allow the real iterator to be initialized lower,
1749 slightly increasing parallelism. Also, if there are few saves
1750 it may eliminate the iterator entirely. */
1752 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1753 && frame_pointer_needed)
1755 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1756 set_mem_alias_set (mem, get_varargs_alias_set ());
1764 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1765 spill_fill_data.init_reg[iter]);
1770 if (! CONST_OK_FOR_I (disp))
1772 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1773 emit_move_insn (tmp, disp_rtx);
1777 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1778 spill_fill_data.init_reg[iter],
1781 seq = gen_sequence ();
1785 /* Careful for being the first insn in a sequence. */
1786 if (spill_fill_data.init_after)
1787 spill_fill_data.init_after
1788 = emit_insn_after (seq, spill_fill_data.init_after);
1791 rtx first = get_insns ();
1793 spill_fill_data.init_after
1794 = emit_insn_before (seq, first);
1796 spill_fill_data.init_after = emit_insn (seq);
1800 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1802 /* ??? Not all of the spills are for varargs, but some of them are.
1803 The rest of the spills belong in an alias set of their own. But
1804 it doesn't actually hurt to include them here. */
1805 set_mem_alias_set (mem, get_varargs_alias_set ());
1807 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1808 spill_fill_data.prev_off[iter] = cfa_off;
1810 if (++iter >= spill_fill_data.n_iter)
1812 spill_fill_data.next_iter = iter;
1818 do_spill (move_fn, reg, cfa_off, frame_reg)
1819 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1821 HOST_WIDE_INT cfa_off;
1823 int iter = spill_fill_data.next_iter;
1826 mem = spill_restore_mem (reg, cfa_off);
1827 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1828 spill_fill_data.prev_insn[iter] = insn;
1835 RTX_FRAME_RELATED_P (insn) = 1;
1837 /* Don't even pretend that the unwind code can intuit its way
1838 through a pair of interleaved post_modify iterators. Just
1839 provide the correct answer. */
1841 if (frame_pointer_needed)
1843 base = hard_frame_pointer_rtx;
1848 base = stack_pointer_rtx;
1849 off = current_frame_info.total_size - cfa_off;
1853 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1854 gen_rtx_SET (VOIDmode,
1855 gen_rtx_MEM (GET_MODE (reg),
1856 plus_constant (base, off)),
1863 do_restore (move_fn, reg, cfa_off)
1864 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1866 HOST_WIDE_INT cfa_off;
1868 int iter = spill_fill_data.next_iter;
1871 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1872 GEN_INT (cfa_off)));
1873 spill_fill_data.prev_insn[iter] = insn;
1876 /* Wrapper functions that discards the CONST_INT spill offset. These
1877 exist so that we can give gr_spill/gr_fill the offset they need and
1878 use a consistant function interface. */
1881 gen_movdi_x (dest, src, offset)
1883 rtx offset ATTRIBUTE_UNUSED;
1885 return gen_movdi (dest, src);
1889 gen_fr_spill_x (dest, src, offset)
1891 rtx offset ATTRIBUTE_UNUSED;
1893 return gen_fr_spill (dest, src);
1897 gen_fr_restore_x (dest, src, offset)
1899 rtx offset ATTRIBUTE_UNUSED;
1901 return gen_fr_restore (dest, src);
1904 /* Called after register allocation to add any instructions needed for the
1905 prologue. Using a prologue insn is favored compared to putting all of the
1906 instructions in output_function_prologue(), since it allows the scheduler
1907 to intermix instructions with the saves of the caller saved registers. In
1908 some cases, it might be necessary to emit a barrier instruction as the last
1909 insn to prevent such scheduling.
1911 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1912 so that the debug info generation code can handle them properly.
1914 The register save area is layed out like so:
1916 [ varargs spill area ]
1917 [ fr register spill area ]
1918 [ br register spill area ]
1919 [ ar register spill area ]
1920 [ pr register spill area ]
1921 [ gr register spill area ] */
1923 /* ??? Get inefficient code when the frame size is larger than can fit in an
1924 adds instruction. */
1927 ia64_expand_prologue ()
1929 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1930 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1933 ia64_compute_frame_size (get_frame_size ());
1934 last_scratch_gr_reg = 15;
1936 /* If there is no epilogue, then we don't need some prologue insns.
1937 We need to avoid emitting the dead prologue insns, because flow
1938 will complain about them. */
1943 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1944 if ((e->flags & EDGE_FAKE) == 0
1945 && (e->flags & EDGE_FALLTHRU) != 0)
1947 epilogue_p = (e != NULL);
1952 /* Set the local, input, and output register names. We need to do this
1953 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1954 half. If we use in/loc/out register names, then we get assembler errors
1955 in crtn.S because there is no alloc insn or regstk directive in there. */
1956 if (! TARGET_REG_NAMES)
1958 int inputs = current_frame_info.n_input_regs;
1959 int locals = current_frame_info.n_local_regs;
1960 int outputs = current_frame_info.n_output_regs;
1962 for (i = 0; i < inputs; i++)
1963 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1964 for (i = 0; i < locals; i++)
1965 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1966 for (i = 0; i < outputs; i++)
1967 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1970 /* Set the frame pointer register name. The regnum is logically loc79,
1971 but of course we'll not have allocated that many locals. Rather than
1972 worrying about renumbering the existing rtxs, we adjust the name. */
1973 /* ??? This code means that we can never use one local register when
1974 there is a frame pointer. loc79 gets wasted in this case, as it is
1975 renamed to a register that will never be used. See also the try_locals
1976 code in find_gr_spill. */
1977 if (current_frame_info.reg_fp)
1979 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1980 reg_names[HARD_FRAME_POINTER_REGNUM]
1981 = reg_names[current_frame_info.reg_fp];
1982 reg_names[current_frame_info.reg_fp] = tmp;
1985 /* Fix up the return address placeholder. */
1986 /* ??? We can fail if __builtin_return_address is used, and we didn't
1987 allocate a register in which to save b0. I can't think of a way to
1988 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1989 then be sure that I got the right one. Further, reload doesn't seem
1990 to care if an eliminable register isn't used, and "eliminates" it
1992 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1993 && current_frame_info.reg_save_b0 != 0)
1994 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1996 /* We don't need an alloc instruction if we've used no outputs or locals. */
1997 if (current_frame_info.n_local_regs == 0
1998 && current_frame_info.n_output_regs == 0
1999 && current_frame_info.n_input_regs <= current_function_args_info.words)
2001 /* If there is no alloc, but there are input registers used, then we
2002 need a .regstk directive. */
2003 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2004 ar_pfs_save_reg = NULL_RTX;
2008 current_frame_info.need_regstk = 0;
2010 if (current_frame_info.reg_save_ar_pfs)
2011 regno = current_frame_info.reg_save_ar_pfs;
2013 regno = next_scratch_gr_reg ();
2014 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2016 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2017 GEN_INT (current_frame_info.n_input_regs),
2018 GEN_INT (current_frame_info.n_local_regs),
2019 GEN_INT (current_frame_info.n_output_regs),
2020 GEN_INT (current_frame_info.n_rotate_regs)));
2021 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2024 /* Set up frame pointer, stack pointer, and spill iterators. */
2026 n_varargs = cfun->machine->n_varargs;
2027 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2028 stack_pointer_rtx, 0);
2030 if (frame_pointer_needed)
2032 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2033 RTX_FRAME_RELATED_P (insn) = 1;
2036 if (current_frame_info.total_size != 0)
2038 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2041 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2042 offset = frame_size_rtx;
2045 regno = next_scratch_gr_reg ();
2046 offset = gen_rtx_REG (DImode, regno);
2047 emit_move_insn (offset, frame_size_rtx);
2050 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2051 stack_pointer_rtx, offset));
2053 if (! frame_pointer_needed)
2055 RTX_FRAME_RELATED_P (insn) = 1;
2056 if (GET_CODE (offset) != CONST_INT)
2059 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2060 gen_rtx_SET (VOIDmode,
2062 gen_rtx_PLUS (DImode,
2069 /* ??? At this point we must generate a magic insn that appears to
2070 modify the stack pointer, the frame pointer, and all spill
2071 iterators. This would allow the most scheduling freedom. For
2072 now, just hard stop. */
2073 emit_insn (gen_blockage ());
2076 /* Must copy out ar.unat before doing any integer spills. */
2077 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2079 if (current_frame_info.reg_save_ar_unat)
2081 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2084 alt_regno = next_scratch_gr_reg ();
2085 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2086 current_frame_info.gr_used_mask |= 1 << alt_regno;
2089 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2090 insn = emit_move_insn (ar_unat_save_reg, reg);
2091 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2093 /* Even if we're not going to generate an epilogue, we still
2094 need to save the register so that EH works. */
2095 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2096 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
2099 ar_unat_save_reg = NULL_RTX;
2101 /* Spill all varargs registers. Do this before spilling any GR registers,
2102 since we want the UNAT bits for the GR registers to override the UNAT
2103 bits from varargs, which we don't care about. */
2106 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2108 reg = gen_rtx_REG (DImode, regno);
2109 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2112 /* Locate the bottom of the register save area. */
2113 cfa_off = (current_frame_info.spill_cfa_off
2114 + current_frame_info.spill_size
2115 + current_frame_info.extra_spill_size);
2117 /* Save the predicate register block either in a register or in memory. */
2118 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2120 reg = gen_rtx_REG (DImode, PR_REG (0));
2121 if (current_frame_info.reg_save_pr != 0)
2123 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2124 insn = emit_move_insn (alt_reg, reg);
2126 /* ??? Denote pr spill/fill by a DImode move that modifies all
2127 64 hard registers. */
2128 RTX_FRAME_RELATED_P (insn) = 1;
2130 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2131 gen_rtx_SET (VOIDmode, alt_reg, reg),
2134 /* Even if we're not going to generate an epilogue, we still
2135 need to save the register so that EH works. */
2137 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2141 alt_regno = next_scratch_gr_reg ();
2142 alt_reg = gen_rtx_REG (DImode, alt_regno);
2143 insn = emit_move_insn (alt_reg, reg);
2144 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2149 /* Handle AR regs in numerical order. All of them get special handling. */
2150 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2151 && current_frame_info.reg_save_ar_unat == 0)
2153 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2154 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2158 /* The alloc insn already copied ar.pfs into a general register. The
2159 only thing we have to do now is copy that register to a stack slot
2160 if we'd not allocated a local register for the job. */
2161 if (current_frame_info.reg_save_ar_pfs == 0
2162 && ! current_function_is_leaf)
2164 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2165 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2169 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2171 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2172 if (current_frame_info.reg_save_ar_lc != 0)
2174 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2175 insn = emit_move_insn (alt_reg, reg);
2176 RTX_FRAME_RELATED_P (insn) = 1;
2178 /* Even if we're not going to generate an epilogue, we still
2179 need to save the register so that EH works. */
2181 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2185 alt_regno = next_scratch_gr_reg ();
2186 alt_reg = gen_rtx_REG (DImode, alt_regno);
2187 emit_move_insn (alt_reg, reg);
2188 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2193 /* We should now be at the base of the gr/br/fr spill area. */
2194 if (cfa_off != (current_frame_info.spill_cfa_off
2195 + current_frame_info.spill_size))
2198 /* Spill all general registers. */
2199 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2200 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2202 reg = gen_rtx_REG (DImode, regno);
2203 do_spill (gen_gr_spill, reg, cfa_off, reg);
2207 /* Handle BR0 specially -- it may be getting stored permanently in
2208 some GR register. */
2209 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2211 reg = gen_rtx_REG (DImode, BR_REG (0));
2212 if (current_frame_info.reg_save_b0 != 0)
2214 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2215 insn = emit_move_insn (alt_reg, reg);
2216 RTX_FRAME_RELATED_P (insn) = 1;
2218 /* Even if we're not going to generate an epilogue, we still
2219 need to save the register so that EH works. */
2221 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2225 alt_regno = next_scratch_gr_reg ();
2226 alt_reg = gen_rtx_REG (DImode, alt_regno);
2227 emit_move_insn (alt_reg, reg);
2228 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2233 /* Spill the rest of the BR registers. */
2234 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2235 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2237 alt_regno = next_scratch_gr_reg ();
2238 alt_reg = gen_rtx_REG (DImode, alt_regno);
2239 reg = gen_rtx_REG (DImode, regno);
2240 emit_move_insn (alt_reg, reg);
2241 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2245 /* Align the frame and spill all FR registers. */
2246 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2247 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2251 reg = gen_rtx_REG (TFmode, regno);
2252 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2256 if (cfa_off != current_frame_info.spill_cfa_off)
2259 finish_spill_pointers ();
2262 /* Called after register allocation to add any instructions needed for the
2263 epilogue. Using a epilogue insn is favored compared to putting all of the
2264 instructions in output_function_prologue(), since it allows the scheduler
2265 to intermix instructions with the saves of the caller saved registers. In
2266 some cases, it might be necessary to emit a barrier instruction as the last
2267 insn to prevent such scheduling. */
2270 ia64_expand_epilogue (sibcall_p)
2273 rtx insn, reg, alt_reg, ar_unat_save_reg;
2274 int regno, alt_regno, cfa_off;
2276 ia64_compute_frame_size (get_frame_size ());
2278 /* If there is a frame pointer, then we use it instead of the stack
2279 pointer, so that the stack pointer does not need to be valid when
2280 the epilogue starts. See EXIT_IGNORE_STACK. */
2281 if (frame_pointer_needed)
2282 setup_spill_pointers (current_frame_info.n_spilled,
2283 hard_frame_pointer_rtx, 0);
2285 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2286 current_frame_info.total_size);
2288 if (current_frame_info.total_size != 0)
2290 /* ??? At this point we must generate a magic insn that appears to
2291 modify the spill iterators and the frame pointer. This would
2292 allow the most scheduling freedom. For now, just hard stop. */
2293 emit_insn (gen_blockage ());
2296 /* Locate the bottom of the register save area. */
2297 cfa_off = (current_frame_info.spill_cfa_off
2298 + current_frame_info.spill_size
2299 + current_frame_info.extra_spill_size);
2301 /* Restore the predicate registers. */
2302 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2304 if (current_frame_info.reg_save_pr != 0)
2305 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2308 alt_regno = next_scratch_gr_reg ();
2309 alt_reg = gen_rtx_REG (DImode, alt_regno);
2310 do_restore (gen_movdi_x, alt_reg, cfa_off);
2313 reg = gen_rtx_REG (DImode, PR_REG (0));
2314 emit_move_insn (reg, alt_reg);
2317 /* Restore the application registers. */
2319 /* Load the saved unat from the stack, but do not restore it until
2320 after the GRs have been restored. */
2321 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2323 if (current_frame_info.reg_save_ar_unat != 0)
2325 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2328 alt_regno = next_scratch_gr_reg ();
2329 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2330 current_frame_info.gr_used_mask |= 1 << alt_regno;
2331 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2336 ar_unat_save_reg = NULL_RTX;
2338 if (current_frame_info.reg_save_ar_pfs != 0)
2340 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2341 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2342 emit_move_insn (reg, alt_reg);
2344 else if (! current_function_is_leaf)
2346 alt_regno = next_scratch_gr_reg ();
2347 alt_reg = gen_rtx_REG (DImode, alt_regno);
2348 do_restore (gen_movdi_x, alt_reg, cfa_off);
2350 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2351 emit_move_insn (reg, alt_reg);
2354 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2356 if (current_frame_info.reg_save_ar_lc != 0)
2357 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2360 alt_regno = next_scratch_gr_reg ();
2361 alt_reg = gen_rtx_REG (DImode, alt_regno);
2362 do_restore (gen_movdi_x, alt_reg, cfa_off);
2365 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2366 emit_move_insn (reg, alt_reg);
2369 /* We should now be at the base of the gr/br/fr spill area. */
2370 if (cfa_off != (current_frame_info.spill_cfa_off
2371 + current_frame_info.spill_size))
2374 /* Restore all general registers. */
2375 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2376 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2378 reg = gen_rtx_REG (DImode, regno);
2379 do_restore (gen_gr_restore, reg, cfa_off);
2383 /* Restore the branch registers. Handle B0 specially, as it may
2384 have gotten stored in some GR register. */
2385 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2387 if (current_frame_info.reg_save_b0 != 0)
2388 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2391 alt_regno = next_scratch_gr_reg ();
2392 alt_reg = gen_rtx_REG (DImode, alt_regno);
2393 do_restore (gen_movdi_x, alt_reg, cfa_off);
2396 reg = gen_rtx_REG (DImode, BR_REG (0));
2397 emit_move_insn (reg, alt_reg);
2400 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2401 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2403 alt_regno = next_scratch_gr_reg ();
2404 alt_reg = gen_rtx_REG (DImode, alt_regno);
2405 do_restore (gen_movdi_x, alt_reg, cfa_off);
2407 reg = gen_rtx_REG (DImode, regno);
2408 emit_move_insn (reg, alt_reg);
2411 /* Restore floating point registers. */
2412 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2413 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2417 reg = gen_rtx_REG (TFmode, regno);
2418 do_restore (gen_fr_restore_x, reg, cfa_off);
2422 /* Restore ar.unat for real. */
2423 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2425 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2426 emit_move_insn (reg, ar_unat_save_reg);
2429 if (cfa_off != current_frame_info.spill_cfa_off)
2432 finish_spill_pointers ();
2434 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2436 /* ??? At this point we must generate a magic insn that appears to
2437 modify the spill iterators, the stack pointer, and the frame
2438 pointer. This would allow the most scheduling freedom. For now,
2440 emit_insn (gen_blockage ());
2443 if (cfun->machine->ia64_eh_epilogue_sp)
2444 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2445 else if (frame_pointer_needed)
2447 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2448 RTX_FRAME_RELATED_P (insn) = 1;
2450 else if (current_frame_info.total_size)
2452 rtx offset, frame_size_rtx;
2454 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2455 if (CONST_OK_FOR_I (current_frame_info.total_size))
2456 offset = frame_size_rtx;
2459 regno = next_scratch_gr_reg ();
2460 offset = gen_rtx_REG (DImode, regno);
2461 emit_move_insn (offset, frame_size_rtx);
2464 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2467 RTX_FRAME_RELATED_P (insn) = 1;
2468 if (GET_CODE (offset) != CONST_INT)
2471 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2472 gen_rtx_SET (VOIDmode,
2474 gen_rtx_PLUS (DImode,
2481 if (cfun->machine->ia64_eh_epilogue_bsp)
2482 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2485 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2488 int fp = GR_REG (2);
2489 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2490 first available call clobbered register. If there was a frame_pointer
2491 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2492 so we have to make sure we're using the string "r2" when emitting
2493 the register name for the assmbler. */
2494 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2495 fp = HARD_FRAME_POINTER_REGNUM;
2497 /* We must emit an alloc to force the input registers to become output
2498 registers. Otherwise, if the callee tries to pass its parameters
2499 through to another call without an intervening alloc, then these
2501 /* ??? We don't need to preserve all input registers. We only need to
2502 preserve those input registers used as arguments to the sibling call.
2503 It is unclear how to compute that number here. */
2504 if (current_frame_info.n_input_regs != 0)
2505 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2506 GEN_INT (0), GEN_INT (0),
2507 GEN_INT (current_frame_info.n_input_regs),
2512 /* Return 1 if br.ret can do all the work required to return from a
2516 ia64_direct_return ()
2518 if (reload_completed && ! frame_pointer_needed)
2520 ia64_compute_frame_size (get_frame_size ());
2522 return (current_frame_info.total_size == 0
2523 && current_frame_info.n_spilled == 0
2524 && current_frame_info.reg_save_b0 == 0
2525 && current_frame_info.reg_save_pr == 0
2526 && current_frame_info.reg_save_ar_pfs == 0
2527 && current_frame_info.reg_save_ar_unat == 0
2528 && current_frame_info.reg_save_ar_lc == 0);
2534 ia64_hard_regno_rename_ok (from, to)
2538 /* Don't clobber any of the registers we reserved for the prologue. */
2539 if (to == current_frame_info.reg_fp
2540 || to == current_frame_info.reg_save_b0
2541 || to == current_frame_info.reg_save_pr
2542 || to == current_frame_info.reg_save_ar_pfs
2543 || to == current_frame_info.reg_save_ar_unat
2544 || to == current_frame_info.reg_save_ar_lc)
2547 if (from == current_frame_info.reg_fp
2548 || from == current_frame_info.reg_save_b0
2549 || from == current_frame_info.reg_save_pr
2550 || from == current_frame_info.reg_save_ar_pfs
2551 || from == current_frame_info.reg_save_ar_unat
2552 || from == current_frame_info.reg_save_ar_lc)
2555 /* Don't use output registers outside the register frame. */
2556 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2559 /* Retain even/oddness on predicate register pairs. */
2560 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2561 return (from & 1) == (to & 1);
2563 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2564 if (from == GR_REG (4) && current_function_calls_setjmp)
2570 /* Emit the function prologue. */
2573 ia64_output_function_prologue (file, size)
2575 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2577 int mask, grsave, grsave_prev;
2579 if (current_frame_info.need_regstk)
2580 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2581 current_frame_info.n_input_regs,
2582 current_frame_info.n_local_regs,
2583 current_frame_info.n_output_regs,
2584 current_frame_info.n_rotate_regs);
2586 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2589 /* Emit the .prologue directive. */
2592 grsave = grsave_prev = 0;
2593 if (current_frame_info.reg_save_b0 != 0)
2596 grsave = grsave_prev = current_frame_info.reg_save_b0;
2598 if (current_frame_info.reg_save_ar_pfs != 0
2599 && (grsave_prev == 0
2600 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2603 if (grsave_prev == 0)
2604 grsave = current_frame_info.reg_save_ar_pfs;
2605 grsave_prev = current_frame_info.reg_save_ar_pfs;
2607 if (current_frame_info.reg_fp != 0
2608 && (grsave_prev == 0
2609 || current_frame_info.reg_fp == grsave_prev + 1))
2612 if (grsave_prev == 0)
2613 grsave = HARD_FRAME_POINTER_REGNUM;
2614 grsave_prev = current_frame_info.reg_fp;
2616 if (current_frame_info.reg_save_pr != 0
2617 && (grsave_prev == 0
2618 || current_frame_info.reg_save_pr == grsave_prev + 1))
2621 if (grsave_prev == 0)
2622 grsave = current_frame_info.reg_save_pr;
2626 fprintf (file, "\t.prologue %d, %d\n", mask,
2627 ia64_dbx_register_number (grsave));
2629 fputs ("\t.prologue\n", file);
2631 /* Emit a .spill directive, if necessary, to relocate the base of
2632 the register spill area. */
2633 if (current_frame_info.spill_cfa_off != -16)
2634 fprintf (file, "\t.spill %ld\n",
2635 (long) (current_frame_info.spill_cfa_off
2636 + current_frame_info.spill_size));
2639 /* Emit the .body directive at the scheduled end of the prologue. */
2642 ia64_output_function_end_prologue (file)
2645 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2648 fputs ("\t.body\n", file);
2651 /* Emit the function epilogue. */
2654 ia64_output_function_epilogue (file, size)
2655 FILE *file ATTRIBUTE_UNUSED;
2656 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2660 /* Reset from the function's potential modifications. */
2661 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2663 if (current_frame_info.reg_fp)
2665 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2666 reg_names[HARD_FRAME_POINTER_REGNUM]
2667 = reg_names[current_frame_info.reg_fp];
2668 reg_names[current_frame_info.reg_fp] = tmp;
2670 if (! TARGET_REG_NAMES)
2672 for (i = 0; i < current_frame_info.n_input_regs; i++)
2673 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2674 for (i = 0; i < current_frame_info.n_local_regs; i++)
2675 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2676 for (i = 0; i < current_frame_info.n_output_regs; i++)
2677 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2680 current_frame_info.initialized = 0;
2684 ia64_dbx_register_number (regno)
2687 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2688 from its home at loc79 to something inside the register frame. We
2689 must perform the same renumbering here for the debug info. */
2690 if (current_frame_info.reg_fp)
2692 if (regno == HARD_FRAME_POINTER_REGNUM)
2693 regno = current_frame_info.reg_fp;
2694 else if (regno == current_frame_info.reg_fp)
2695 regno = HARD_FRAME_POINTER_REGNUM;
2698 if (IN_REGNO_P (regno))
2699 return 32 + regno - IN_REG (0);
2700 else if (LOC_REGNO_P (regno))
2701 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2702 else if (OUT_REGNO_P (regno))
2703 return (32 + current_frame_info.n_input_regs
2704 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2710 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2711 rtx addr, fnaddr, static_chain;
2713 rtx addr_reg, eight = GEN_INT (8);
2715 /* Load up our iterator. */
2716 addr_reg = gen_reg_rtx (Pmode);
2717 emit_move_insn (addr_reg, addr);
2719 /* The first two words are the fake descriptor:
2720 __ia64_trampoline, ADDR+16. */
2721 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2722 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2723 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2725 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2726 copy_to_reg (plus_constant (addr, 16)));
2727 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2729 /* The third word is the target descriptor. */
2730 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2731 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2733 /* The fourth word is the static chain. */
2734 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2737 /* Do any needed setup for a variadic function. CUM has not been updated
2738 for the last named argument which has type TYPE and mode MODE.
2740 We generate the actual spill instructions during prologue generation. */
2743 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2744 CUMULATIVE_ARGS cum;
2748 int second_time ATTRIBUTE_UNUSED;
2750 /* If this is a stdarg function, then skip the current argument. */
2751 if (! current_function_varargs)
2752 ia64_function_arg_advance (&cum, int_mode, type, 1);
2754 if (cum.words < MAX_ARGUMENT_SLOTS)
2756 int n = MAX_ARGUMENT_SLOTS - cum.words;
2757 *pretend_size = n * UNITS_PER_WORD;
2758 cfun->machine->n_varargs = n;
2762 /* Check whether TYPE is a homogeneous floating point aggregate. If
2763 it is, return the mode of the floating point type that appears
2764 in all leafs. If it is not, return VOIDmode.
2766 An aggregate is a homogeneous floating point aggregate is if all
2767 fields/elements in it have the same floating point type (e.g,
2768 SFmode). 128-bit quad-precision floats are excluded. */
2770 static enum machine_mode
2771 hfa_element_mode (type, nested)
2775 enum machine_mode element_mode = VOIDmode;
2776 enum machine_mode mode;
2777 enum tree_code code = TREE_CODE (type);
2778 int know_element_mode = 0;
2783 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2784 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2785 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2786 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2790 /* Fortran complex types are supposed to be HFAs, so we need to handle
2791 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2794 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2795 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2796 * BITS_PER_UNIT, MODE_FLOAT, 0);
2801 /* ??? Should exclude 128-bit long double here. */
2802 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2803 mode if this is contained within an aggregate. */
2805 return TYPE_MODE (type);
2810 return TYPE_MODE (TREE_TYPE (type));
2814 case QUAL_UNION_TYPE:
2815 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2817 if (TREE_CODE (t) != FIELD_DECL)
2820 mode = hfa_element_mode (TREE_TYPE (t), 1);
2821 if (know_element_mode)
2823 if (mode != element_mode)
2826 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2830 know_element_mode = 1;
2831 element_mode = mode;
2834 return element_mode;
2837 /* If we reach here, we probably have some front-end specific type
2838 that the backend doesn't know about. This can happen via the
2839 aggregate_value_p call in init_function_start. All we can do is
2840 ignore unknown tree types. */
2847 /* Return rtx for register where argument is passed, or zero if it is passed
2850 /* ??? 128-bit quad-precision floats are always passed in general
2854 ia64_function_arg (cum, mode, type, named, incoming)
2855 CUMULATIVE_ARGS *cum;
2856 enum machine_mode mode;
2861 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2862 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2863 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2866 enum machine_mode hfa_mode = VOIDmode;
2868 /* Integer and float arguments larger than 8 bytes start at the next even
2869 boundary. Aggregates larger than 8 bytes start at the next even boundary
2870 if the aggregate has 16 byte alignment. Net effect is that types with
2871 alignment greater than 8 start at the next even boundary. */
2872 /* ??? The ABI does not specify how to handle aggregates with alignment from
2873 9 to 15 bytes, or greater than 16. We handle them all as if they had
2874 16 byte alignment. Such aggregates can occur only if gcc extensions are
2876 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2878 && (cum->words & 1))
2881 /* If all argument slots are used, then it must go on the stack. */
2882 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2885 /* Check for and handle homogeneous FP aggregates. */
2887 hfa_mode = hfa_element_mode (type, 0);
2889 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2890 and unprototyped hfas are passed specially. */
2891 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2895 int fp_regs = cum->fp_regs;
2896 int int_regs = cum->words + offset;
2897 int hfa_size = GET_MODE_SIZE (hfa_mode);
2901 /* If prototyped, pass it in FR regs then GR regs.
2902 If not prototyped, pass it in both FR and GR regs.
2904 If this is an SFmode aggregate, then it is possible to run out of
2905 FR regs while GR regs are still left. In that case, we pass the
2906 remaining part in the GR regs. */
2908 /* Fill the FP regs. We do this always. We stop if we reach the end
2909 of the argument, the last FP register, or the last argument slot. */
2911 byte_size = ((mode == BLKmode)
2912 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2913 args_byte_size = int_regs * UNITS_PER_WORD;
2915 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2916 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2918 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2919 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2923 args_byte_size += hfa_size;
2927 /* If no prototype, then the whole thing must go in GR regs. */
2928 if (! cum->prototype)
2930 /* If this is an SFmode aggregate, then we might have some left over
2931 that needs to go in GR regs. */
2932 else if (byte_size != offset)
2933 int_regs += offset / UNITS_PER_WORD;
2935 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2937 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2939 enum machine_mode gr_mode = DImode;
2941 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2942 then this goes in a GR reg left adjusted/little endian, right
2943 adjusted/big endian. */
2944 /* ??? Currently this is handled wrong, because 4-byte hunks are
2945 always right adjusted/little endian. */
2948 /* If we have an even 4 byte hunk because the aggregate is a
2949 multiple of 4 bytes in size, then this goes in a GR reg right
2950 adjusted/little endian. */
2951 else if (byte_size - offset == 4)
2953 /* Complex floats need to have float mode. */
2954 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
2957 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2958 gen_rtx_REG (gr_mode, (basereg
2961 offset += GET_MODE_SIZE (gr_mode);
2962 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
2963 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
2966 /* If we ended up using just one location, just return that one loc. */
2968 return XEXP (loc[0], 0);
2970 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2973 /* Integral and aggregates go in general registers. If we have run out of
2974 FR registers, then FP values must also go in general registers. This can
2975 happen when we have a SFmode HFA. */
2976 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
2977 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
2978 return gen_rtx_REG (mode, basereg + cum->words + offset);
2980 /* If there is a prototype, then FP values go in a FR register when
2981 named, and in a GR registeer when unnamed. */
2982 else if (cum->prototype)
2985 return gen_rtx_REG (mode, basereg + cum->words + offset);
2987 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2989 /* If there is no prototype, then FP values go in both FR and GR
2993 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2994 gen_rtx_REG (mode, (FR_ARG_FIRST
2997 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2999 (basereg + cum->words
3003 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3007 /* Return number of words, at the beginning of the argument, that must be
3008 put in registers. 0 is the argument is entirely in registers or entirely
3012 ia64_function_arg_partial_nregs (cum, mode, type, named)
3013 CUMULATIVE_ARGS *cum;
3014 enum machine_mode mode;
3016 int named ATTRIBUTE_UNUSED;
3018 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3019 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3023 /* Arguments with alignment larger than 8 bytes start at the next even
3025 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3027 && (cum->words & 1))
3030 /* If all argument slots are used, then it must go on the stack. */
3031 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3034 /* It doesn't matter whether the argument goes in FR or GR regs. If
3035 it fits within the 8 argument slots, then it goes entirely in
3036 registers. If it extends past the last argument slot, then the rest
3037 goes on the stack. */
3039 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3042 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3045 /* Update CUM to point after this argument. This is patterned after
3046 ia64_function_arg. */
3049 ia64_function_arg_advance (cum, mode, type, named)
3050 CUMULATIVE_ARGS *cum;
3051 enum machine_mode mode;
3055 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3056 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3059 enum machine_mode hfa_mode = VOIDmode;
3061 /* If all arg slots are already full, then there is nothing to do. */
3062 if (cum->words >= MAX_ARGUMENT_SLOTS)
3065 /* Arguments with alignment larger than 8 bytes start at the next even
3067 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3069 && (cum->words & 1))
3072 cum->words += words + offset;
3074 /* Check for and handle homogeneous FP aggregates. */
3076 hfa_mode = hfa_element_mode (type, 0);
3078 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3079 and unprototyped hfas are passed specially. */
3080 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3082 int fp_regs = cum->fp_regs;
3083 /* This is the original value of cum->words + offset. */
3084 int int_regs = cum->words - words;
3085 int hfa_size = GET_MODE_SIZE (hfa_mode);
3089 /* If prototyped, pass it in FR regs then GR regs.
3090 If not prototyped, pass it in both FR and GR regs.
3092 If this is an SFmode aggregate, then it is possible to run out of
3093 FR regs while GR regs are still left. In that case, we pass the
3094 remaining part in the GR regs. */
3096 /* Fill the FP regs. We do this always. We stop if we reach the end
3097 of the argument, the last FP register, or the last argument slot. */
3099 byte_size = ((mode == BLKmode)
3100 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3101 args_byte_size = int_regs * UNITS_PER_WORD;
3103 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3104 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3107 args_byte_size += hfa_size;
3111 cum->fp_regs = fp_regs;
3114 /* Integral and aggregates go in general registers. If we have run out of
3115 FR registers, then FP values must also go in general registers. This can
3116 happen when we have a SFmode HFA. */
3117 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3120 /* If there is a prototype, then FP values go in a FR register when
3121 named, and in a GR registeer when unnamed. */
3122 else if (cum->prototype)
3127 /* ??? Complex types should not reach here. */
3128 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3130 /* If there is no prototype, then FP values go in both FR and GR
3133 /* ??? Complex types should not reach here. */
3134 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3139 /* Implement va_start. */
3142 ia64_va_start (stdarg_p, valist, nextarg)
3150 arg_words = current_function_args_info.words;
3155 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3157 nextarg = plus_constant (nextarg, ofs);
3158 std_expand_builtin_va_start (1, valist, nextarg);
3161 /* Implement va_arg. */
3164 ia64_va_arg (valist, type)
3169 /* Arguments with alignment larger than 8 bytes start at the next even
3171 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3173 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3174 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3175 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3176 build_int_2 (-2 * UNITS_PER_WORD, -1));
3177 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3178 TREE_SIDE_EFFECTS (t) = 1;
3179 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3182 return std_expand_builtin_va_arg (valist, type);
3185 /* Return 1 if function return value returned in memory. Return 0 if it is
3189 ia64_return_in_memory (valtype)
3192 enum machine_mode mode;
3193 enum machine_mode hfa_mode;
3196 mode = TYPE_MODE (valtype);
3197 byte_size = ((mode == BLKmode)
3198 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3200 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3202 hfa_mode = hfa_element_mode (valtype, 0);
3203 if (hfa_mode != VOIDmode)
3205 int hfa_size = GET_MODE_SIZE (hfa_mode);
3207 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3213 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3219 /* Return rtx for register that holds the function return value. */
3222 ia64_function_value (valtype, func)
3224 tree func ATTRIBUTE_UNUSED;
3226 enum machine_mode mode;
3227 enum machine_mode hfa_mode;
3229 mode = TYPE_MODE (valtype);
3230 hfa_mode = hfa_element_mode (valtype, 0);
3232 if (hfa_mode != VOIDmode)
3240 hfa_size = GET_MODE_SIZE (hfa_mode);
3241 byte_size = ((mode == BLKmode)
3242 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3244 for (i = 0; offset < byte_size; i++)
3246 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3247 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3253 return XEXP (loc[0], 0);
3255 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3257 else if (FLOAT_TYPE_P (valtype) &&
3258 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3259 return gen_rtx_REG (mode, FR_ARG_FIRST);
3261 return gen_rtx_REG (mode, GR_RET_FIRST);
3264 /* Print a memory address as an operand to reference that memory location. */
3266 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3267 also call this from ia64_print_operand for memory addresses. */
3270 ia64_print_operand_address (stream, address)
3271 FILE * stream ATTRIBUTE_UNUSED;
3272 rtx address ATTRIBUTE_UNUSED;
3276 /* Print an operand to a assembler instruction.
3277 C Swap and print a comparison operator.
3278 D Print an FP comparison operator.
3279 E Print 32 - constant, for SImode shifts as extract.
3280 e Print 64 - constant, for DImode rotates.
3281 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3282 a floating point register emitted normally.
3283 I Invert a predicate register by adding 1.
3284 J Select the proper predicate register for a condition.
3285 j Select the inverse predicate register for a condition.
3286 O Append .acq for volatile load.
3287 P Postincrement of a MEM.
3288 Q Append .rel for volatile store.
3289 S Shift amount for shladd instruction.
3290 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3291 for Intel assembler.
3292 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3293 for Intel assembler.
3294 r Print register name, or constant 0 as r0. HP compatibility for
3297 ia64_print_operand (file, x, code)
3307 /* Handled below. */
3312 enum rtx_code c = swap_condition (GET_CODE (x));
3313 fputs (GET_RTX_NAME (c), file);
3318 switch (GET_CODE (x))
3330 str = GET_RTX_NAME (GET_CODE (x));
3337 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3341 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3345 if (x == CONST0_RTX (GET_MODE (x)))
3346 str = reg_names [FR_REG (0)];
3347 else if (x == CONST1_RTX (GET_MODE (x)))
3348 str = reg_names [FR_REG (1)];
3349 else if (GET_CODE (x) == REG)
3350 str = reg_names [REGNO (x)];
3357 fputs (reg_names [REGNO (x) + 1], file);
3363 unsigned int regno = REGNO (XEXP (x, 0));
3364 if (GET_CODE (x) == EQ)
3368 fputs (reg_names [regno], file);
3373 if (MEM_VOLATILE_P (x))
3374 fputs(".acq", file);
3379 HOST_WIDE_INT value;
3381 switch (GET_CODE (XEXP (x, 0)))
3387 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3388 if (GET_CODE (x) == CONST_INT)
3390 else if (GET_CODE (x) == REG)
3392 fprintf (file, ", %s", reg_names[REGNO (x)]);
3400 value = GET_MODE_SIZE (GET_MODE (x));
3404 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3410 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3415 if (MEM_VOLATILE_P (x))
3416 fputs(".rel", file);
3420 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3424 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3426 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3432 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3434 const char *prefix = "0x";
3435 if (INTVAL (x) & 0x80000000)
3437 fprintf (file, "0xffffffff");
3440 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3446 /* If this operand is the constant zero, write it as register zero.
3447 Any register, zero, or CONST_INT value is OK here. */
3448 if (GET_CODE (x) == REG)
3449 fputs (reg_names[REGNO (x)], file);
3450 else if (x == CONST0_RTX (GET_MODE (x)))
3452 else if (GET_CODE (x) == CONST_INT)
3453 output_addr_const (file, x);
3455 output_operand_lossage ("invalid %%r value");
3462 /* For conditional branches, returns or calls, substitute
3463 sptk, dptk, dpnt, or spnt for %s. */
3464 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3467 int pred_val = INTVAL (XEXP (x, 0));
3469 /* Guess top and bottom 10% statically predicted. */
3470 if (pred_val < REG_BR_PROB_BASE / 50)
3472 else if (pred_val < REG_BR_PROB_BASE / 2)
3474 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3479 else if (GET_CODE (current_output_insn) == CALL_INSN)
3484 fputs (which, file);
3489 x = current_insn_predicate;
3492 unsigned int regno = REGNO (XEXP (x, 0));
3493 if (GET_CODE (x) == EQ)
3495 fprintf (file, "(%s) ", reg_names [regno]);
3500 output_operand_lossage ("ia64_print_operand: unknown code");
3504 switch (GET_CODE (x))
3506 /* This happens for the spill/restore instructions. */
3511 /* ... fall through ... */
3514 fputs (reg_names [REGNO (x)], file);
3519 rtx addr = XEXP (x, 0);
3520 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3521 addr = XEXP (addr, 0);
3522 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3527 output_addr_const (file, x);
3534 /* Calulate the cost of moving data from a register in class FROM to
3538 ia64_register_move_cost (from, to)
3539 enum reg_class from, to;
3541 int from_hard, to_hard;
3546 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3547 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3548 from_gr = (from == GENERAL_REGS);
3549 to_gr = (to == GENERAL_REGS);
3550 from_fr = (from == FR_REGS);
3551 to_fr = (to == FR_REGS);
3552 from_pr = (from == PR_REGS);
3553 to_pr = (to == PR_REGS);
3555 if (from_hard && to_hard)
3557 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3560 /* Moving between PR registers takes two insns. */
3561 else if (from_pr && to_pr)
3563 /* Moving between PR and anything but GR is impossible. */
3564 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3567 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3568 secondary memory reloads for TFmode moves. Unfortunately, we don't
3569 have the mode here, so we can't check that. */
3570 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3571 to avoid spectacularly poor register class preferencing for TFmode. */
3572 else if (from_fr != to_fr)
3578 /* This function returns the register class required for a secondary
3579 register when copying between one of the registers in CLASS, and X,
3580 using MODE. A return value of NO_REGS means that no secondary register
3584 ia64_secondary_reload_class (class, mode, x)
3585 enum reg_class class;
3586 enum machine_mode mode ATTRIBUTE_UNUSED;
3591 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3592 regno = true_regnum (x);
3597 /* ??? This is required because of a bad gcse/cse/global interaction.
3598 We end up with two pseudos with overlapping lifetimes both of which
3599 are equiv to the same constant, and both which need to be in BR_REGS.
3600 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3601 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3602 This seems to be a cse bug. cse_basic_block_end changes depending
3603 on the path length, which means the qty_first_reg check in
3604 make_regs_eqv can give different answers at different times. */
3605 /* ??? At some point I'll probably need a reload_indi pattern to handle
3607 if (BR_REGNO_P (regno))
3610 /* This is needed if a pseudo used as a call_operand gets spilled to a
3612 if (GET_CODE (x) == MEM)
3617 /* This can happen when a paradoxical subreg is an operand to the
3619 /* ??? This shouldn't be necessary after instruction scheduling is
3620 enabled, because paradoxical subregs are not accepted by
3621 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3622 stop the paradoxical subreg stupidity in the *_operand functions
3624 if (GET_CODE (x) == MEM
3625 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3626 || GET_MODE (x) == QImode))
3629 /* This can happen because of the ior/and/etc patterns that accept FP
3630 registers as operands. If the third operand is a constant, then it
3631 needs to be reloaded into a FP register. */
3632 if (GET_CODE (x) == CONST_INT)
3635 /* This can happen because of register elimination in a muldi3 insn.
3636 E.g. `26107 * (unsigned long)&u'. */
3637 if (GET_CODE (x) == PLUS)
3642 /* ??? This happens if we cse/gcse a BImode value across a call,
3643 and the function has a nonlocal goto. This is because global
3644 does not allocate call crossing pseudos to hard registers when
3645 current_function_has_nonlocal_goto is true. This is relatively
3646 common for C++ programs that use exceptions. To reproduce,
3647 return NO_REGS and compile libstdc++. */
3648 if (GET_CODE (x) == MEM)
3651 /* This can happen when we take a BImode subreg of a DImode value,
3652 and that DImode value winds up in some non-GR register. */
3653 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3658 /* Since we have no offsettable memory addresses, we need a temporary
3659 to hold the address of the second word. */
3672 /* Emit text to declare externally defined variables and functions, because
3673 the Intel assembler does not support undefined externals. */
3676 ia64_asm_output_external (file, decl, name)
3681 int save_referenced;
3683 /* GNU as does not need anything here. */
3687 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3688 the linker when we do this, so we need to be careful not to do this for
3689 builtin functions which have no library equivalent. Unfortunately, we
3690 can't tell here whether or not a function will actually be called by
3691 expand_expr, so we pull in library functions even if we may not need
3693 if (! strcmp (name, "__builtin_next_arg")
3694 || ! strcmp (name, "alloca")
3695 || ! strcmp (name, "__builtin_constant_p")
3696 || ! strcmp (name, "__builtin_args_info"))
3699 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3701 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3702 if (TREE_CODE (decl) == FUNCTION_DECL)
3704 fprintf (file, "%s", TYPE_ASM_OP);
3705 assemble_name (file, name);
3707 fprintf (file, TYPE_OPERAND_FMT, "function");
3710 ASM_GLOBALIZE_LABEL (file, name);
3711 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3714 /* Parse the -mfixed-range= option string. */
3717 fix_range (const_str)
3718 const char *const_str;
3721 char *str, *dash, *comma;
3723 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3724 REG2 are either register names or register numbers. The effect
3725 of this option is to mark the registers in the range from REG1 to
3726 REG2 as ``fixed'' so they won't be used by the compiler. This is
3727 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3729 i = strlen (const_str);
3730 str = (char *) alloca (i + 1);
3731 memcpy (str, const_str, i + 1);
3735 dash = strchr (str, '-');
3738 warning ("value of -mfixed-range must have form REG1-REG2");
3743 comma = strchr (dash + 1, ',');
3747 first = decode_reg_name (str);
3750 warning ("unknown register name: %s", str);
3754 last = decode_reg_name (dash + 1);
3757 warning ("unknown register name: %s", dash + 1);
3765 warning ("%s-%s is an empty range", str, dash + 1);
3769 for (i = first; i <= last; ++i)
3770 fixed_regs[i] = call_used_regs[i] = 1;
3780 /* Called to register all of our global variables with the garbage
3784 ia64_add_gc_roots ()
3786 ggc_add_rtx_root (&ia64_compare_op0, 1);
3787 ggc_add_rtx_root (&ia64_compare_op1, 1);
3791 ia64_init_machine_status (p)
3795 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3799 ia64_mark_machine_status (p)
3802 struct machine_function *machine = p->machine;
3806 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3807 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3808 ggc_mark_rtx (machine->ia64_gp_save);
3813 ia64_free_machine_status (p)
3820 /* Handle TARGET_OPTIONS switches. */
3823 ia64_override_options ()
3825 if (TARGET_AUTO_PIC)
3826 target_flags |= MASK_CONST_GP;
3828 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3830 warning ("cannot optimize division for both latency and throughput");
3831 target_flags &= ~MASK_INLINE_DIV_THR;
3834 if (ia64_fixed_range_string)
3835 fix_range (ia64_fixed_range_string);
3837 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3838 flag_schedule_insns_after_reload = 0;
3840 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3842 init_machine_status = ia64_init_machine_status;
3843 mark_machine_status = ia64_mark_machine_status;
3844 free_machine_status = ia64_free_machine_status;
3846 ia64_add_gc_roots ();
3849 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3850 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3851 static enum attr_type ia64_safe_type PARAMS((rtx));
3853 static enum attr_itanium_requires_unit0
3854 ia64_safe_itanium_requires_unit0 (insn)
3857 if (recog_memoized (insn) >= 0)
3858 return get_attr_itanium_requires_unit0 (insn);
3860 return ITANIUM_REQUIRES_UNIT0_NO;
3863 static enum attr_itanium_class
3864 ia64_safe_itanium_class (insn)
3867 if (recog_memoized (insn) >= 0)
3868 return get_attr_itanium_class (insn);
3870 return ITANIUM_CLASS_UNKNOWN;
3873 static enum attr_type
3874 ia64_safe_type (insn)
3877 if (recog_memoized (insn) >= 0)
3878 return get_attr_type (insn);
3880 return TYPE_UNKNOWN;
3883 /* The following collection of routines emit instruction group stop bits as
3884 necessary to avoid dependencies. */
3886 /* Need to track some additional registers as far as serialization is
3887 concerned so we can properly handle br.call and br.ret. We could
3888 make these registers visible to gcc, but since these registers are
3889 never explicitly used in gcc generated code, it seems wasteful to
3890 do so (plus it would make the call and return patterns needlessly
3892 #define REG_GP (GR_REG (1))
3893 #define REG_RP (BR_REG (0))
3894 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3895 /* This is used for volatile asms which may require a stop bit immediately
3896 before and after them. */
3897 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3898 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3899 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3901 /* For each register, we keep track of how it has been written in the
3902 current instruction group.
3904 If a register is written unconditionally (no qualifying predicate),
3905 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3907 If a register is written if its qualifying predicate P is true, we
3908 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3909 may be written again by the complement of P (P^1) and when this happens,
3910 WRITE_COUNT gets set to 2.
3912 The result of this is that whenever an insn attempts to write a register
3913 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3915 If a predicate register is written by a floating-point insn, we set
3916 WRITTEN_BY_FP to true.
3918 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3919 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3921 struct reg_write_state
3923 unsigned int write_count : 2;
3924 unsigned int first_pred : 16;
3925 unsigned int written_by_fp : 1;
3926 unsigned int written_by_and : 1;
3927 unsigned int written_by_or : 1;
3930 /* Cumulative info for the current instruction group. */
3931 struct reg_write_state rws_sum[NUM_REGS];
3932 /* Info for the current instruction. This gets copied to rws_sum after a
3933 stop bit is emitted. */
3934 struct reg_write_state rws_insn[NUM_REGS];
3936 /* Indicates whether this is the first instruction after a stop bit,
3937 in which case we don't need another stop bit. Without this, we hit
3938 the abort in ia64_variable_issue when scheduling an alloc. */
3939 static int first_instruction;
3941 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3942 RTL for one instruction. */
3945 unsigned int is_write : 1; /* Is register being written? */
3946 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3947 unsigned int is_branch : 1; /* Is register used as part of a branch? */
3948 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3949 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
3950 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
3953 static void rws_update PARAMS ((struct reg_write_state *, int,
3954 struct reg_flags, int));
3955 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3956 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
3957 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
3958 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3959 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
3960 static void init_insn_group_barriers PARAMS ((void));
3961 static int group_barrier_needed_p PARAMS ((rtx));
3962 static int safe_group_barrier_needed_p PARAMS ((rtx));
3964 /* Update *RWS for REGNO, which is being written by the current instruction,
3965 with predicate PRED, and associated register flags in FLAGS. */
3968 rws_update (rws, regno, flags, pred)
3969 struct reg_write_state *rws;
3971 struct reg_flags flags;
3974 rws[regno].write_count += pred ? 1 : 2;
3975 rws[regno].written_by_fp |= flags.is_fp;
3976 /* ??? Not tracking and/or across differing predicates. */
3977 rws[regno].written_by_and = flags.is_and;
3978 rws[regno].written_by_or = flags.is_or;
3979 rws[regno].first_pred = pred;
3982 /* Handle an access to register REGNO of type FLAGS using predicate register
3983 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3984 a dependency with an earlier instruction in the same group. */
3987 rws_access_regno (regno, flags, pred)
3989 struct reg_flags flags;
3992 int need_barrier = 0;
3994 if (regno >= NUM_REGS)
3997 if (! PR_REGNO_P (regno))
3998 flags.is_and = flags.is_or = 0;
4004 /* One insn writes same reg multiple times? */
4005 if (rws_insn[regno].write_count > 0)
4008 /* Update info for current instruction. */
4009 rws_update (rws_insn, regno, flags, pred);
4010 write_count = rws_sum[regno].write_count;
4012 switch (write_count)
4015 /* The register has not been written yet. */
4016 rws_update (rws_sum, regno, flags, pred);
4020 /* The register has been written via a predicate. If this is
4021 not a complementary predicate, then we need a barrier. */
4022 /* ??? This assumes that P and P+1 are always complementary
4023 predicates for P even. */
4024 if (flags.is_and && rws_sum[regno].written_by_and)
4026 else if (flags.is_or && rws_sum[regno].written_by_or)
4028 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4030 rws_update (rws_sum, regno, flags, pred);
4034 /* The register has been unconditionally written already. We
4036 if (flags.is_and && rws_sum[regno].written_by_and)
4038 else if (flags.is_or && rws_sum[regno].written_by_or)
4042 rws_sum[regno].written_by_and = flags.is_and;
4043 rws_sum[regno].written_by_or = flags.is_or;
4052 if (flags.is_branch)
4054 /* Branches have several RAW exceptions that allow to avoid
4057 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4058 /* RAW dependencies on branch regs are permissible as long
4059 as the writer is a non-branch instruction. Since we
4060 never generate code that uses a branch register written
4061 by a branch instruction, handling this case is
4065 if (REGNO_REG_CLASS (regno) == PR_REGS
4066 && ! rws_sum[regno].written_by_fp)
4067 /* The predicates of a branch are available within the
4068 same insn group as long as the predicate was written by
4069 something other than a floating-point instruction. */
4073 if (flags.is_and && rws_sum[regno].written_by_and)
4075 if (flags.is_or && rws_sum[regno].written_by_or)
4078 switch (rws_sum[regno].write_count)
4081 /* The register has not been written yet. */
4085 /* The register has been written via a predicate. If this is
4086 not a complementary predicate, then we need a barrier. */
4087 /* ??? This assumes that P and P+1 are always complementary
4088 predicates for P even. */
4089 if ((rws_sum[regno].first_pred ^ 1) != pred)
4094 /* The register has been unconditionally written already. We
4104 return need_barrier;
4108 rws_access_reg (reg, flags, pred)
4110 struct reg_flags flags;
4113 int regno = REGNO (reg);
4114 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4117 return rws_access_regno (regno, flags, pred);
4120 int need_barrier = 0;
4122 need_barrier |= rws_access_regno (regno + n, flags, pred);
4123 return need_barrier;
4127 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4128 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4131 update_set_flags (x, pflags, ppred, pcond)
4133 struct reg_flags *pflags;
4137 rtx src = SET_SRC (x);
4141 switch (GET_CODE (src))
4147 if (SET_DEST (x) == pc_rtx)
4148 /* X is a conditional branch. */
4152 int is_complemented = 0;
4154 /* X is a conditional move. */
4155 rtx cond = XEXP (src, 0);
4156 if (GET_CODE (cond) == EQ)
4157 is_complemented = 1;
4158 cond = XEXP (cond, 0);
4159 if (GET_CODE (cond) != REG
4160 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4163 if (XEXP (src, 1) == SET_DEST (x)
4164 || XEXP (src, 2) == SET_DEST (x))
4166 /* X is a conditional move that conditionally writes the
4169 /* We need another complement in this case. */
4170 if (XEXP (src, 1) == SET_DEST (x))
4171 is_complemented = ! is_complemented;
4173 *ppred = REGNO (cond);
4174 if (is_complemented)
4178 /* ??? If this is a conditional write to the dest, then this
4179 instruction does not actually read one source. This probably
4180 doesn't matter, because that source is also the dest. */
4181 /* ??? Multiple writes to predicate registers are allowed
4182 if they are all AND type compares, or if they are all OR
4183 type compares. We do not generate such instructions
4186 /* ... fall through ... */
4189 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4190 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4191 /* Set pflags->is_fp to 1 so that we know we're dealing
4192 with a floating point comparison when processing the
4193 destination of the SET. */
4196 /* Discover if this is a parallel comparison. We only handle
4197 and.orcm and or.andcm at present, since we must retain a
4198 strict inverse on the predicate pair. */
4199 else if (GET_CODE (src) == AND)
4201 else if (GET_CODE (src) == IOR)
4208 /* Subroutine of rtx_needs_barrier; this function determines whether the
4209 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4210 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4214 set_src_needs_barrier (x, flags, pred, cond)
4216 struct reg_flags flags;
4220 int need_barrier = 0;
4222 rtx src = SET_SRC (x);
4224 if (GET_CODE (src) == CALL)
4225 /* We don't need to worry about the result registers that
4226 get written by subroutine call. */
4227 return rtx_needs_barrier (src, flags, pred);
4228 else if (SET_DEST (x) == pc_rtx)
4230 /* X is a conditional branch. */
4231 /* ??? This seems redundant, as the caller sets this bit for
4233 flags.is_branch = 1;
4234 return rtx_needs_barrier (src, flags, pred);
4237 need_barrier = rtx_needs_barrier (src, flags, pred);
4239 /* This instruction unconditionally uses a predicate register. */
4241 need_barrier |= rws_access_reg (cond, flags, 0);
4244 if (GET_CODE (dst) == ZERO_EXTRACT)
4246 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4247 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4248 dst = XEXP (dst, 0);
4250 return need_barrier;
4253 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4254 Return 1 is this access creates a dependency with an earlier instruction
4255 in the same group. */
4258 rtx_needs_barrier (x, flags, pred)
4260 struct reg_flags flags;
4264 int is_complemented = 0;
4265 int need_barrier = 0;
4266 const char *format_ptr;
4267 struct reg_flags new_flags;
4275 switch (GET_CODE (x))
4278 update_set_flags (x, &new_flags, &pred, &cond);
4279 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4280 if (GET_CODE (SET_SRC (x)) != CALL)
4282 new_flags.is_write = 1;
4283 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4288 new_flags.is_write = 0;
4289 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4291 /* Avoid multiple register writes, in case this is a pattern with
4292 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4293 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4295 new_flags.is_write = 1;
4296 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4297 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4298 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4303 /* X is a predicated instruction. */
4305 cond = COND_EXEC_TEST (x);
4308 need_barrier = rtx_needs_barrier (cond, flags, 0);
4310 if (GET_CODE (cond) == EQ)
4311 is_complemented = 1;
4312 cond = XEXP (cond, 0);
4313 if (GET_CODE (cond) != REG
4314 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4316 pred = REGNO (cond);
4317 if (is_complemented)
4320 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4321 return need_barrier;
4325 /* Clobber & use are for earlier compiler-phases only. */
4330 /* We always emit stop bits for traditional asms. We emit stop bits
4331 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4332 if (GET_CODE (x) != ASM_OPERANDS
4333 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4335 /* Avoid writing the register multiple times if we have multiple
4336 asm outputs. This avoids an abort in rws_access_reg. */
4337 if (! rws_insn[REG_VOLATILE].write_count)
4339 new_flags.is_write = 1;
4340 rws_access_regno (REG_VOLATILE, new_flags, pred);
4345 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4346 We can not just fall through here since then we would be confused
4347 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4348 traditional asms unlike their normal usage. */
4350 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4351 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4356 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4358 rtx pat = XVECEXP (x, 0, i);
4359 if (GET_CODE (pat) == SET)
4361 update_set_flags (pat, &new_flags, &pred, &cond);
4362 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4364 else if (GET_CODE (pat) == USE
4365 || GET_CODE (pat) == CALL
4366 || GET_CODE (pat) == ASM_OPERANDS)
4367 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4368 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4371 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4373 rtx pat = XVECEXP (x, 0, i);
4374 if (GET_CODE (pat) == SET)
4376 if (GET_CODE (SET_SRC (pat)) != CALL)
4378 new_flags.is_write = 1;
4379 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4383 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4384 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4392 if (REGNO (x) == AR_UNAT_REGNUM)
4394 for (i = 0; i < 64; ++i)
4395 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4398 need_barrier = rws_access_reg (x, flags, pred);
4402 /* Find the regs used in memory address computation. */
4403 new_flags.is_write = 0;
4404 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4407 case CONST_INT: case CONST_DOUBLE:
4408 case SYMBOL_REF: case LABEL_REF: case CONST:
4411 /* Operators with side-effects. */
4412 case POST_INC: case POST_DEC:
4413 if (GET_CODE (XEXP (x, 0)) != REG)
4416 new_flags.is_write = 0;
4417 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4418 new_flags.is_write = 1;
4419 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4423 if (GET_CODE (XEXP (x, 0)) != REG)
4426 new_flags.is_write = 0;
4427 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4428 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4429 new_flags.is_write = 1;
4430 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4433 /* Handle common unary and binary ops for efficiency. */
4434 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4435 case MOD: case UDIV: case UMOD: case AND: case IOR:
4436 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4437 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4438 case NE: case EQ: case GE: case GT: case LE:
4439 case LT: case GEU: case GTU: case LEU: case LTU:
4440 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4441 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4444 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4445 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4446 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4447 case SQRT: case FFS:
4448 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4452 switch (XINT (x, 1))
4454 case 1: /* st8.spill */
4455 case 2: /* ld8.fill */
4457 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4458 HOST_WIDE_INT bit = (offset >> 3) & 63;
4460 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4461 new_flags.is_write = (XINT (x, 1) == 1);
4462 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4467 case 3: /* stf.spill */
4468 case 4: /* ldf.spill */
4469 case 8: /* popcnt */
4470 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4473 case 7: /* pred_rel_mutex */
4474 case 9: /* pic call */
4476 case 19: /* fetchadd_acq */
4477 case 20: /* mov = ar.bsp */
4478 case 21: /* flushrs */
4479 case 22: /* bundle selector */
4480 case 23: /* cycle display */
4483 case 24: /* addp4 */
4484 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4487 case 5: /* recip_approx */
4488 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4489 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4492 case 13: /* cmpxchg_acq */
4493 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4494 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4502 case UNSPEC_VOLATILE:
4503 switch (XINT (x, 1))
4506 /* Alloc must always be the first instruction of a group.
4507 We force this by always returning true. */
4508 /* ??? We might get better scheduling if we explicitly check for
4509 input/local/output register dependencies, and modify the
4510 scheduler so that alloc is always reordered to the start of
4511 the current group. We could then eliminate all of the
4512 first_instruction code. */
4513 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4515 new_flags.is_write = 1;
4516 rws_access_regno (REG_AR_CFM, new_flags, pred);
4519 case 1: /* blockage */
4520 case 2: /* insn group barrier */
4523 case 5: /* set_bsp */
4527 case 7: /* pred.rel.mutex */
4528 case 8: /* safe_across_calls all */
4529 case 9: /* safe_across_calls normal */
4538 new_flags.is_write = 0;
4539 need_barrier = rws_access_regno (REG_RP, flags, pred);
4540 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4542 new_flags.is_write = 1;
4543 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4544 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4548 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4549 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4550 switch (format_ptr[i])
4552 case '0': /* unused field */
4553 case 'i': /* integer */
4554 case 'n': /* note */
4555 case 'w': /* wide integer */
4556 case 's': /* pointer to string */
4557 case 'S': /* optional pointer to string */
4561 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4566 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4567 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4576 return need_barrier;
4579 /* Clear out the state for group_barrier_needed_p at the start of a
4580 sequence of insns. */
4583 init_insn_group_barriers ()
4585 memset (rws_sum, 0, sizeof (rws_sum));
4586 first_instruction = 1;
4589 /* Given the current state, recorded by previous calls to this function,
4590 determine whether a group barrier (a stop bit) is necessary before INSN.
4591 Return nonzero if so. */
4594 group_barrier_needed_p (insn)
4598 int need_barrier = 0;
4599 struct reg_flags flags;
4601 memset (&flags, 0, sizeof (flags));
4602 switch (GET_CODE (insn))
4608 /* A barrier doesn't imply an instruction group boundary. */
4612 memset (rws_insn, 0, sizeof (rws_insn));
4616 flags.is_branch = 1;
4617 flags.is_sibcall = SIBLING_CALL_P (insn);
4618 memset (rws_insn, 0, sizeof (rws_insn));
4620 /* Don't bundle a call following another call. */
4621 if ((pat = prev_active_insn (insn))
4622 && GET_CODE (pat) == CALL_INSN)
4628 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4632 flags.is_branch = 1;
4634 /* Don't bundle a jump following a call. */
4635 if ((pat = prev_active_insn (insn))
4636 && GET_CODE (pat) == CALL_INSN)
4644 if (GET_CODE (PATTERN (insn)) == USE
4645 || GET_CODE (PATTERN (insn)) == CLOBBER)
4646 /* Don't care about USE and CLOBBER "insns"---those are used to
4647 indicate to the optimizer that it shouldn't get rid of
4648 certain operations. */
4651 pat = PATTERN (insn);
4653 /* Ug. Hack hacks hacked elsewhere. */
4654 switch (recog_memoized (insn))
4656 /* We play dependency tricks with the epilogue in order
4657 to get proper schedules. Undo this for dv analysis. */
4658 case CODE_FOR_epilogue_deallocate_stack:
4659 pat = XVECEXP (pat, 0, 0);
4662 /* The pattern we use for br.cloop confuses the code above.
4663 The second element of the vector is representative. */
4664 case CODE_FOR_doloop_end_internal:
4665 pat = XVECEXP (pat, 0, 1);
4668 /* Doesn't generate code. */
4669 case CODE_FOR_pred_rel_mutex:
4676 memset (rws_insn, 0, sizeof (rws_insn));
4677 need_barrier = rtx_needs_barrier (pat, flags, 0);
4679 /* Check to see if the previous instruction was a volatile
4682 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4689 if (first_instruction)
4692 first_instruction = 0;
4695 return need_barrier;
4698 /* Like group_barrier_needed_p, but do not clobber the current state. */
4701 safe_group_barrier_needed_p (insn)
4704 struct reg_write_state rws_saved[NUM_REGS];
4705 int saved_first_instruction;
4708 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4709 saved_first_instruction = first_instruction;
4711 t = group_barrier_needed_p (insn);
4713 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4714 first_instruction = saved_first_instruction;
4719 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4720 as necessary to eliminate dependendencies. This function assumes that
4721 a final instruction scheduling pass has been run which has already
4722 inserted most of the necessary stop bits. This function only inserts
4723 new ones at basic block boundaries, since these are invisible to the
4727 emit_insn_group_barriers (dump, insns)
4733 int insns_since_last_label = 0;
4735 init_insn_group_barriers ();
4737 for (insn = insns; insn; insn = NEXT_INSN (insn))
4739 if (GET_CODE (insn) == CODE_LABEL)
4741 if (insns_since_last_label)
4743 insns_since_last_label = 0;
4745 else if (GET_CODE (insn) == NOTE
4746 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4748 if (insns_since_last_label)
4750 insns_since_last_label = 0;
4752 else if (GET_CODE (insn) == INSN
4753 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4754 && XINT (PATTERN (insn), 1) == 2)
4756 init_insn_group_barriers ();
4759 else if (INSN_P (insn))
4761 insns_since_last_label = 1;
4763 if (group_barrier_needed_p (insn))
4768 fprintf (dump, "Emitting stop before label %d\n",
4769 INSN_UID (last_label));
4770 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4773 init_insn_group_barriers ();
4781 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4782 This function has to emit all necessary group barriers. */
4785 emit_all_insn_group_barriers (dump, insns)
4786 FILE *dump ATTRIBUTE_UNUSED;
4791 init_insn_group_barriers ();
4793 for (insn = insns; insn; insn = NEXT_INSN (insn))
4795 if (GET_CODE (insn) == INSN
4796 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4797 && XINT (PATTERN (insn), 1) == 2)
4798 init_insn_group_barriers ();
4799 else if (INSN_P (insn))
4801 if (group_barrier_needed_p (insn))
4803 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4804 init_insn_group_barriers ();
4805 group_barrier_needed_p (insn);
4811 static int errata_find_address_regs PARAMS ((rtx *, void *));
4812 static void errata_emit_nops PARAMS ((rtx));
4813 static void fixup_errata PARAMS ((void));
4815 /* This structure is used to track some details about the previous insns
4816 groups so we can determine if it may be necessary to insert NOPs to
4817 workaround hardware errata. */
4820 HARD_REG_SET p_reg_set;
4821 HARD_REG_SET gr_reg_conditionally_set;
4824 /* Index into the last_group array. */
4825 static int group_idx;
4827 /* Called through for_each_rtx; determines if a hard register that was
4828 conditionally set in the previous group is used as an address register.
4829 It ensures that for_each_rtx returns 1 in that case. */
4831 errata_find_address_regs (xp, data)
4833 void *data ATTRIBUTE_UNUSED;
4836 if (GET_CODE (x) != MEM)
4839 if (GET_CODE (x) == POST_MODIFY)
4841 if (GET_CODE (x) == REG)
4843 struct group *prev_group = last_group + (group_idx ^ 1);
4844 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4852 /* Called for each insn; this function keeps track of the state in
4853 last_group and emits additional NOPs if necessary to work around
4854 an Itanium A/B step erratum. */
4856 errata_emit_nops (insn)
4859 struct group *this_group = last_group + group_idx;
4860 struct group *prev_group = last_group + (group_idx ^ 1);
4861 rtx pat = PATTERN (insn);
4862 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4863 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4864 enum attr_type type;
4867 if (GET_CODE (real_pat) == USE
4868 || GET_CODE (real_pat) == CLOBBER
4869 || GET_CODE (real_pat) == ASM_INPUT
4870 || GET_CODE (real_pat) == ADDR_VEC
4871 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4872 || asm_noperands (PATTERN (insn)) >= 0)
4875 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4878 if (GET_CODE (set) == PARALLEL)
4881 set = XVECEXP (real_pat, 0, 0);
4882 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4883 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4884 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4891 if (set && GET_CODE (set) != SET)
4894 type = get_attr_type (insn);
4897 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4898 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4900 if ((type == TYPE_M || type == TYPE_A) && cond && set
4901 && REG_P (SET_DEST (set))
4902 && GET_CODE (SET_SRC (set)) != PLUS
4903 && GET_CODE (SET_SRC (set)) != MINUS
4904 && (GET_CODE (SET_SRC (set)) != ASHIFT
4905 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
4906 && (GET_CODE (SET_SRC (set)) != MEM
4907 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4908 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4910 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4911 || ! REG_P (XEXP (cond, 0)))
4914 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4915 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4917 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4919 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4920 emit_insn_before (gen_nop (), insn);
4921 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4923 memset (last_group, 0, sizeof last_group);
4927 /* Emit extra nops if they are required to work around hardware errata. */
4934 if (! TARGET_B_STEP)
4938 memset (last_group, 0, sizeof last_group);
4940 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4945 if (ia64_safe_type (insn) == TYPE_S)
4948 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4951 errata_emit_nops (insn);
4955 /* Instruction scheduling support. */
4956 /* Describe one bundle. */
4960 /* Zero if there's no possibility of a stop in this bundle other than
4961 at the end, otherwise the position of the optional stop bit. */
4963 /* The types of the three slots. */
4964 enum attr_type t[3];
4965 /* The pseudo op to be emitted into the assembler output. */
4969 #define NR_BUNDLES 10
4971 /* A list of all available bundles. */
4973 static const struct bundle bundle[NR_BUNDLES] =
4975 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4976 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4977 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4978 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4979 #if NR_BUNDLES == 10
4980 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4981 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4983 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4984 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4985 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4986 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4987 it matches an L type insn. Otherwise we'll try to generate L type
4989 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4992 /* Describe a packet of instructions. Packets consist of two bundles that
4993 are visible to the hardware in one scheduling window. */
4997 const struct bundle *t1, *t2;
4998 /* Precomputed value of the first split issue in this packet if a cycle
4999 starts at its beginning. */
5001 /* For convenience, the insn types are replicated here so we don't have
5002 to go through T1 and T2 all the time. */
5003 enum attr_type t[6];
5006 /* An array containing all possible packets. */
5007 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5008 static struct ia64_packet packets[NR_PACKETS];
5010 /* Map attr_type to a string with the name. */
5012 static const char *type_names[] =
5014 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5017 /* Nonzero if we should insert stop bits into the schedule. */
5018 int ia64_final_schedule = 0;
5020 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5021 static rtx ia64_single_set PARAMS ((rtx));
5022 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5023 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5024 static void maybe_rotate PARAMS ((FILE *));
5025 static void finish_last_head PARAMS ((FILE *, int));
5026 static void rotate_one_bundle PARAMS ((FILE *));
5027 static void rotate_two_bundles PARAMS ((FILE *));
5028 static void nop_cycles_until PARAMS ((int, FILE *));
5029 static void cycle_end_fill_slots PARAMS ((FILE *));
5030 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5031 static int get_split PARAMS ((const struct ia64_packet *, int));
5032 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5033 const struct ia64_packet *, int));
5034 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5035 rtx *, enum attr_type *, int));
5036 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5037 static void dump_current_packet PARAMS ((FILE *));
5038 static void schedule_stop PARAMS ((FILE *));
5039 static rtx gen_nop_type PARAMS ((enum attr_type));
5040 static void ia64_emit_nops PARAMS ((void));
5042 /* Map a bundle number to its pseudo-op. */
5048 return bundle[b].name;
5051 /* Compute the slot which will cause a split issue in packet P if the
5052 current cycle begins at slot BEGIN. */
5055 itanium_split_issue (p, begin)
5056 const struct ia64_packet *p;
5059 int type_count[TYPE_S];
5065 /* Always split before and after MMF. */
5066 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5068 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5070 /* Always split after MBB and BBB. */
5071 if (p->t[1] == TYPE_B)
5073 /* Split after first bundle in MIB BBB combination. */
5074 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5078 memset (type_count, 0, sizeof type_count);
5079 for (i = begin; i < split; i++)
5081 enum attr_type t0 = p->t[i];
5082 /* An MLX bundle reserves the same units as an MFI bundle. */
5083 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5084 : t0 == TYPE_X ? TYPE_I
5086 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
5087 if (type_count[t] == max)
5094 /* Return the maximum number of instructions a cpu can issue. */
5102 /* Helper function - like single_set, but look inside COND_EXEC. */
5105 ia64_single_set (insn)
5108 rtx x = PATTERN (insn);
5109 if (GET_CODE (x) == COND_EXEC)
5110 x = COND_EXEC_CODE (x);
5111 if (GET_CODE (x) == SET)
5113 return single_set_2 (insn, x);
5116 /* Adjust the cost of a scheduling dependency. Return the new cost of
5117 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5120 ia64_adjust_cost (insn, link, dep_insn, cost)
5121 rtx insn, link, dep_insn;
5124 enum attr_type dep_type;
5125 enum attr_itanium_class dep_class;
5126 enum attr_itanium_class insn_class;
5127 rtx dep_set, set, src, addr;
5129 if (GET_CODE (PATTERN (insn)) == CLOBBER
5130 || GET_CODE (PATTERN (insn)) == USE
5131 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5132 || GET_CODE (PATTERN (dep_insn)) == USE
5133 /* @@@ Not accurate for indirect calls. */
5134 || GET_CODE (insn) == CALL_INSN
5135 || ia64_safe_type (insn) == TYPE_S)
5138 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5139 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5142 dep_type = ia64_safe_type (dep_insn);
5143 dep_class = ia64_safe_itanium_class (dep_insn);
5144 insn_class = ia64_safe_itanium_class (insn);
5146 /* Compares that feed a conditional branch can execute in the same
5148 dep_set = ia64_single_set (dep_insn);
5149 set = ia64_single_set (insn);
5151 if (dep_type != TYPE_F
5153 && GET_CODE (SET_DEST (dep_set)) == REG
5154 && PR_REG (REGNO (SET_DEST (dep_set)))
5155 && GET_CODE (insn) == JUMP_INSN)
5158 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5160 /* ??? Can't find any information in the documenation about whether
5164 splits issue. Assume it doesn't. */
5168 src = set ? SET_SRC (set) : 0;
5170 if (set && GET_CODE (SET_DEST (set)) == MEM)
5171 addr = XEXP (SET_DEST (set), 0);
5172 else if (set && GET_CODE (src) == MEM)
5173 addr = XEXP (src, 0);
5174 else if (set && GET_CODE (src) == ZERO_EXTEND
5175 && GET_CODE (XEXP (src, 0)) == MEM)
5176 addr = XEXP (XEXP (src, 0), 0);
5177 else if (set && GET_CODE (src) == UNSPEC
5178 && XVECLEN (XEXP (src, 0), 0) > 0
5179 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5180 addr = XEXP (XVECEXP (src, 0, 0), 0);
5181 if (addr && GET_CODE (addr) == POST_MODIFY)
5182 addr = XEXP (addr, 0);
5184 set = ia64_single_set (dep_insn);
5186 if ((dep_class == ITANIUM_CLASS_IALU
5187 || dep_class == ITANIUM_CLASS_ILOG
5188 || dep_class == ITANIUM_CLASS_LD)
5189 && (insn_class == ITANIUM_CLASS_LD
5190 || insn_class == ITANIUM_CLASS_ST))
5192 if (! addr || ! set)
5194 /* This isn't completely correct - an IALU that feeds an address has
5195 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5196 otherwise. Unfortunately there's no good way to describe this. */
5197 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5200 if ((dep_class == ITANIUM_CLASS_IALU
5201 || dep_class == ITANIUM_CLASS_ILOG
5202 || dep_class == ITANIUM_CLASS_LD)
5203 && (insn_class == ITANIUM_CLASS_MMMUL
5204 || insn_class == ITANIUM_CLASS_MMSHF
5205 || insn_class == ITANIUM_CLASS_MMSHFI))
5207 if (dep_class == ITANIUM_CLASS_FMAC
5208 && (insn_class == ITANIUM_CLASS_FMISC
5209 || insn_class == ITANIUM_CLASS_FCVTFX
5210 || insn_class == ITANIUM_CLASS_XMPY))
5212 if ((dep_class == ITANIUM_CLASS_FMAC
5213 || dep_class == ITANIUM_CLASS_FMISC
5214 || dep_class == ITANIUM_CLASS_FCVTFX
5215 || dep_class == ITANIUM_CLASS_XMPY)
5216 && insn_class == ITANIUM_CLASS_STF)
5218 if ((dep_class == ITANIUM_CLASS_MMMUL
5219 || dep_class == ITANIUM_CLASS_MMSHF
5220 || dep_class == ITANIUM_CLASS_MMSHFI)
5221 && (insn_class == ITANIUM_CLASS_LD
5222 || insn_class == ITANIUM_CLASS_ST
5223 || insn_class == ITANIUM_CLASS_IALU
5224 || insn_class == ITANIUM_CLASS_ILOG
5225 || insn_class == ITANIUM_CLASS_ISHF))
5231 /* Describe the current state of the Itanium pipeline. */
5234 /* The first slot that is used in the current cycle. */
5236 /* The next slot to fill. */
5238 /* The packet we have selected for the current issue window. */
5239 const struct ia64_packet *packet;
5240 /* The position of the split issue that occurs due to issue width
5241 limitations (6 if there's no split issue). */
5243 /* Record data about the insns scheduled so far in the same issue
5244 window. The elements up to but not including FIRST_SLOT belong
5245 to the previous cycle, the ones starting with FIRST_SLOT belong
5246 to the current cycle. */
5247 enum attr_type types[6];
5250 /* Nonzero if we decided to schedule a stop bit. */
5254 /* Temporary arrays; they have enough elements to hold all insns that
5255 can be ready at the same time while scheduling of the current block.
5256 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5257 static rtx *sched_ready;
5258 static enum attr_type *sched_types;
5260 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5264 insn_matches_slot (p, itype, slot, insn)
5265 const struct ia64_packet *p;
5266 enum attr_type itype;
5270 enum attr_itanium_requires_unit0 u0;
5271 enum attr_type stype = p->t[slot];
5275 u0 = ia64_safe_itanium_requires_unit0 (insn);
5276 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5279 for (i = sched_data.first_slot; i < slot; i++)
5280 if (p->t[i] == stype)
5283 if (GET_CODE (insn) == CALL_INSN)
5285 /* Reject calls in multiway branch packets. We want to limit
5286 the number of multiway branches we generate (since the branch
5287 predictor is limited), and this seems to work fairly well.
5288 (If we didn't do this, we'd have to add another test here to
5289 force calls into the third slot of the bundle.) */
5292 if (p->t[1] == TYPE_B)
5297 if (p->t[4] == TYPE_B)
5305 if (itype == TYPE_A)
5306 return stype == TYPE_M || stype == TYPE_I;
5310 /* Like emit_insn_before, but skip cycle_display insns. This makes the
5311 assembly output a bit prettier. */
5314 ia64_emit_insn_before (insn, before)
5317 rtx prev = PREV_INSN (before);
5318 if (prev && GET_CODE (prev) == INSN
5319 && GET_CODE (PATTERN (prev)) == UNSPEC
5320 && XINT (PATTERN (prev), 1) == 23)
5322 emit_insn_before (insn, before);
5326 /* Generate a nop insn of the given type. Note we never generate L type
5336 return gen_nop_m ();
5338 return gen_nop_i ();
5340 return gen_nop_b ();
5342 return gen_nop_f ();
5344 return gen_nop_x ();
5351 /* When rotating a bundle out of the issue window, insert a bundle selector
5352 insn in front of it. DUMP is the scheduling dump file or NULL. START
5353 is either 0 or 3, depending on whether we want to emit a bundle selector
5354 for the first bundle or the second bundle in the current issue window.
5356 The selector insns are emitted this late because the selected packet can
5357 be changed until parts of it get rotated out. */
5360 finish_last_head (dump, start)
5364 const struct ia64_packet *p = sched_data.packet;
5365 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5366 int bundle_type = b - bundle;
5370 if (! ia64_final_schedule)
5373 for (i = start; sched_data.insns[i] == 0; i++)
5376 insn = sched_data.insns[i];
5379 fprintf (dump, "// Emitting template before %d: %s\n",
5380 INSN_UID (insn), b->name);
5382 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5385 /* We can't schedule more insns this cycle. Fix up the scheduling state
5386 and advance FIRST_SLOT and CUR.
5387 We have to distribute the insns that are currently found between
5388 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5389 far, they are stored successively in the fields starting at FIRST_SLOT;
5390 now they must be moved to the correct slots.
5391 DUMP is the current scheduling dump file, or NULL. */
5394 cycle_end_fill_slots (dump)
5397 const struct ia64_packet *packet = sched_data.packet;
5399 enum attr_type tmp_types[6];
5402 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5403 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5405 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5407 enum attr_type t = tmp_types[i];
5408 if (t != ia64_safe_type (tmp_insns[i]))
5410 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5412 if (slot > sched_data.split)
5415 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5417 sched_data.types[slot] = packet->t[slot];
5418 sched_data.insns[slot] = 0;
5419 sched_data.stopbit[slot] = 0;
5422 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5423 actual slot type later. */
5424 sched_data.types[slot] = packet->t[slot];
5425 sched_data.insns[slot] = tmp_insns[i];
5426 sched_data.stopbit[slot] = 0;
5430 /* This isn't right - there's no need to pad out until the forced split;
5431 the CPU will automatically split if an insn isn't ready. */
5433 while (slot < sched_data.split)
5435 sched_data.types[slot] = packet->t[slot];
5436 sched_data.insns[slot] = 0;
5437 sched_data.stopbit[slot] = 0;
5442 sched_data.first_slot = sched_data.cur = slot;
5445 /* Bundle rotations, as described in the Itanium optimization manual.
5446 We can rotate either one or both bundles out of the issue window.
5447 DUMP is the current scheduling dump file, or NULL. */
5450 rotate_one_bundle (dump)
5454 fprintf (dump, "// Rotating one bundle.\n");
5456 finish_last_head (dump, 0);
5457 if (sched_data.cur > 3)
5459 sched_data.cur -= 3;
5460 sched_data.first_slot -= 3;
5461 memmove (sched_data.types,
5462 sched_data.types + 3,
5463 sched_data.cur * sizeof *sched_data.types);
5464 memmove (sched_data.stopbit,
5465 sched_data.stopbit + 3,
5466 sched_data.cur * sizeof *sched_data.stopbit);
5467 memmove (sched_data.insns,
5468 sched_data.insns + 3,
5469 sched_data.cur * sizeof *sched_data.insns);
5474 sched_data.first_slot = 0;
5479 rotate_two_bundles (dump)
5483 fprintf (dump, "// Rotating two bundles.\n");
5485 if (sched_data.cur == 0)
5488 finish_last_head (dump, 0);
5489 if (sched_data.cur > 3)
5490 finish_last_head (dump, 3);
5492 sched_data.first_slot = 0;
5495 /* We're beginning a new block. Initialize data structures as necessary. */
5498 ia64_sched_init (dump, sched_verbose, max_ready)
5499 FILE *dump ATTRIBUTE_UNUSED;
5500 int sched_verbose ATTRIBUTE_UNUSED;
5503 static int initialized = 0;
5511 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5513 const struct bundle *t1 = bundle + b1;
5514 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5516 const struct bundle *t2 = bundle + b2;
5522 for (i = 0; i < NR_PACKETS; i++)
5525 for (j = 0; j < 3; j++)
5526 packets[i].t[j] = packets[i].t1->t[j];
5527 for (j = 0; j < 3; j++)
5528 packets[i].t[j + 3] = packets[i].t2->t[j];
5529 packets[i].first_split = itanium_split_issue (packets + i, 0);
5534 init_insn_group_barriers ();
5536 memset (&sched_data, 0, sizeof sched_data);
5537 sched_types = (enum attr_type *) xmalloc (max_ready
5538 * sizeof (enum attr_type));
5539 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5542 /* See if the packet P can match the insns we have already scheduled. Return
5543 nonzero if so. In *PSLOT, we store the first slot that is available for
5544 more instructions if we choose this packet.
5545 SPLIT holds the last slot we can use, there's a split issue after it so
5546 scheduling beyond it would cause us to use more than one cycle. */
5549 packet_matches_p (p, split, pslot)
5550 const struct ia64_packet *p;
5554 int filled = sched_data.cur;
5555 int first = sched_data.first_slot;
5558 /* First, check if the first of the two bundles must be a specific one (due
5560 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5562 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5565 for (i = 0; i < first; i++)
5566 if (! insn_matches_slot (p, sched_data.types[i], i,
5567 sched_data.insns[i]))
5569 for (i = slot = first; i < filled; i++)
5571 while (slot < split)
5573 if (insn_matches_slot (p, sched_data.types[i], slot,
5574 sched_data.insns[i]))
5588 /* A frontend for itanium_split_issue. For a packet P and a slot
5589 number FIRST that describes the start of the current clock cycle,
5590 return the slot number of the first split issue. This function
5591 uses the cached number found in P if possible. */
5594 get_split (p, first)
5595 const struct ia64_packet *p;
5599 return p->first_split;
5600 return itanium_split_issue (p, first);
5603 /* Given N_READY insns in the array READY, whose types are found in the
5604 corresponding array TYPES, return the insn that is best suited to be
5605 scheduled in slot SLOT of packet P. */
5608 find_best_insn (ready, types, n_ready, p, slot)
5610 enum attr_type *types;
5612 const struct ia64_packet *p;
5617 while (n_ready-- > 0)
5619 rtx insn = ready[n_ready];
5622 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5624 /* If we have equally good insns, one of which has a stricter
5625 slot requirement, prefer the one with the stricter requirement. */
5626 if (best >= 0 && types[n_ready] == TYPE_A)
5628 if (insn_matches_slot (p, types[n_ready], slot, insn))
5631 best_pri = INSN_PRIORITY (ready[best]);
5633 /* If there's no way we could get a stricter requirement, stop
5635 if (types[n_ready] != TYPE_A
5636 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5644 /* Select the best packet to use given the current scheduler state and the
5646 READY is an array holding N_READY ready insns; TYPES is a corresponding
5647 array that holds their types. Store the best packet in *PPACKET and the
5648 number of insns that can be scheduled in the current cycle in *PBEST. */
5651 find_best_packet (pbest, ppacket, ready, types, n_ready)
5653 const struct ia64_packet **ppacket;
5655 enum attr_type *types;
5658 int first = sched_data.first_slot;
5661 const struct ia64_packet *best_packet = NULL;
5664 for (i = 0; i < NR_PACKETS; i++)
5666 const struct ia64_packet *p = packets + i;
5668 int split = get_split (p, first);
5670 int first_slot, last_slot;
5673 if (! packet_matches_p (p, split, &first_slot))
5676 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5680 for (slot = first_slot; slot < split; slot++)
5684 /* Disallow a degenerate case where the first bundle doesn't
5685 contain anything but NOPs! */
5686 if (first_slot == 0 && win == 0 && slot == 3)
5692 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5695 sched_ready[insn_nr] = 0;
5699 else if (p->t[slot] == TYPE_B)
5702 /* We must disallow MBB/BBB packets if any of their B slots would be
5703 filled with nops. */
5706 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5711 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5716 || (win == best && last_slot < lowest_end))
5719 lowest_end = last_slot;
5724 *ppacket = best_packet;
5727 /* Reorder the ready list so that the insns that can be issued in this cycle
5728 are found in the correct order at the end of the list.
5729 DUMP is the scheduling dump file, or NULL. READY points to the start,
5730 E_READY to the end of the ready list. MAY_FAIL determines what should be
5731 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5732 otherwise we return 0.
5733 Return 1 if any insns can be scheduled in this cycle. */
5736 itanium_reorder (dump, ready, e_ready, may_fail)
5742 const struct ia64_packet *best_packet;
5743 int n_ready = e_ready - ready;
5744 int first = sched_data.first_slot;
5745 int i, best, best_split, filled;
5747 for (i = 0; i < n_ready; i++)
5748 sched_types[i] = ia64_safe_type (ready[i]);
5750 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5761 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5762 best_packet->t1->name,
5763 best_packet->t2 ? best_packet->t2->name : NULL, best);
5766 best_split = itanium_split_issue (best_packet, first);
5767 packet_matches_p (best_packet, best_split, &filled);
5769 for (i = filled; i < best_split; i++)
5773 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5776 rtx insn = ready[insn_nr];
5777 memmove (ready + insn_nr, ready + insn_nr + 1,
5778 (n_ready - insn_nr - 1) * sizeof (rtx));
5779 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5780 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5781 ready[--n_ready] = insn;
5785 sched_data.packet = best_packet;
5786 sched_data.split = best_split;
5790 /* Dump information about the current scheduling state to file DUMP. */
5793 dump_current_packet (dump)
5797 fprintf (dump, "// %d slots filled:", sched_data.cur);
5798 for (i = 0; i < sched_data.first_slot; i++)
5800 rtx insn = sched_data.insns[i];
5801 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5803 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5804 if (sched_data.stopbit[i])
5805 fprintf (dump, " ;;");
5807 fprintf (dump, " :::");
5808 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5810 rtx insn = sched_data.insns[i];
5811 enum attr_type t = ia64_safe_type (insn);
5812 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5814 fprintf (dump, "\n");
5817 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5821 schedule_stop (dump)
5824 const struct ia64_packet *best = sched_data.packet;
5829 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5831 if (sched_data.cur == 0)
5834 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5836 rotate_two_bundles (NULL);
5840 for (i = -1; i < NR_PACKETS; i++)
5842 /* This is a slight hack to give the current packet the first chance.
5843 This is done to avoid e.g. switching from MIB to MBB bundles. */
5844 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5845 int split = get_split (p, sched_data.first_slot);
5846 const struct bundle *compare;
5849 if (! packet_matches_p (p, split, &next))
5852 compare = next > 3 ? p->t2 : p->t1;
5855 if (compare->possible_stop)
5856 stoppos = compare->possible_stop;
5860 if (stoppos < next || stoppos >= best_stop)
5862 if (compare->possible_stop == 0)
5864 stoppos = (next > 3 ? 6 : 3);
5866 if (stoppos < next || stoppos >= best_stop)
5870 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5871 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5874 best_stop = stoppos;
5878 sched_data.packet = best;
5879 cycle_end_fill_slots (dump);
5880 while (sched_data.cur < best_stop)
5882 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5883 sched_data.insns[sched_data.cur] = 0;
5884 sched_data.stopbit[sched_data.cur] = 0;
5887 sched_data.stopbit[sched_data.cur - 1] = 1;
5888 sched_data.first_slot = best_stop;
5891 dump_current_packet (dump);
5894 /* If necessary, perform one or two rotations on the scheduling state.
5895 This should only be called if we are starting a new cycle. */
5901 if (sched_data.cur == 6)
5902 rotate_two_bundles (dump);
5903 else if (sched_data.cur >= 3)
5904 rotate_one_bundle (dump);
5905 sched_data.first_slot = sched_data.cur;
5908 /* The clock cycle when ia64_sched_reorder was last called. */
5909 static int prev_cycle;
5911 /* The first insn scheduled in the previous cycle. This is the saved
5912 value of sched_data.first_slot. */
5913 static int prev_first;
5915 /* The last insn that has been scheduled. At the start of a new cycle
5916 we know that we can emit new insns after it; the main scheduling code
5917 has already emitted a cycle_display insn after it and is using that
5918 as its current last insn. */
5919 static rtx last_issued;
5921 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5922 pad out the delay between MM (shifts, etc.) and integer operations. */
5925 nop_cycles_until (clock_var, dump)
5929 int prev_clock = prev_cycle;
5930 int cycles_left = clock_var - prev_clock;
5932 /* Finish the previous cycle; pad it out with NOPs. */
5933 if (sched_data.cur == 3)
5935 rtx t = gen_insn_group_barrier (GEN_INT (3));
5936 last_issued = emit_insn_after (t, last_issued);
5937 maybe_rotate (dump);
5939 else if (sched_data.cur > 0)
5942 int split = itanium_split_issue (sched_data.packet, prev_first);
5944 if (sched_data.cur < 3 && split > 3)
5950 if (split > sched_data.cur)
5953 for (i = sched_data.cur; i < split; i++)
5957 t = gen_nop_type (sched_data.packet->t[i]);
5958 last_issued = emit_insn_after (t, last_issued);
5959 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5960 sched_data.insns[i] = last_issued;
5961 sched_data.stopbit[i] = 0;
5963 sched_data.cur = split;
5966 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
5970 for (i = sched_data.cur; i < 6; i++)
5974 t = gen_nop_type (sched_data.packet->t[i]);
5975 last_issued = emit_insn_after (t, last_issued);
5976 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5977 sched_data.insns[i] = last_issued;
5978 sched_data.stopbit[i] = 0;
5985 if (need_stop || sched_data.cur == 6)
5987 rtx t = gen_insn_group_barrier (GEN_INT (3));
5988 last_issued = emit_insn_after (t, last_issued);
5990 maybe_rotate (dump);
5994 while (cycles_left > 0)
5996 rtx t = gen_bundle_selector (GEN_INT (0));
5997 last_issued = emit_insn_after (t, last_issued);
5998 t = gen_nop_type (TYPE_M);
5999 last_issued = emit_insn_after (t, last_issued);
6000 t = gen_nop_type (TYPE_I);
6001 last_issued = emit_insn_after (t, last_issued);
6002 if (cycles_left > 1)
6004 t = gen_insn_group_barrier (GEN_INT (2));
6005 last_issued = emit_insn_after (t, last_issued);
6008 t = gen_nop_type (TYPE_I);
6009 last_issued = emit_insn_after (t, last_issued);
6010 t = gen_insn_group_barrier (GEN_INT (3));
6011 last_issued = emit_insn_after (t, last_issued);
6016 /* We are about to being issuing insns for this clock cycle.
6017 Override the default sort algorithm to better slot instructions. */
6020 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6021 reorder_type, clock_var)
6022 FILE *dump ATTRIBUTE_UNUSED;
6023 int sched_verbose ATTRIBUTE_UNUSED;
6026 int reorder_type, clock_var;
6029 int n_ready = *pn_ready;
6030 rtx *e_ready = ready + n_ready;
6035 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6036 dump_current_packet (dump);
6039 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6041 for (insnp = ready; insnp < e_ready; insnp++)
6044 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6045 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
6046 || t == ITANIUM_CLASS_ILOG
6047 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
6050 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6051 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
6052 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
6054 rtx other = XEXP (link, 0);
6055 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6056 if (t0 == ITANIUM_CLASS_MMSHF
6057 || t0 == ITANIUM_CLASS_MMMUL)
6059 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6068 prev_first = sched_data.first_slot;
6069 prev_cycle = clock_var;
6071 if (reorder_type == 0)
6072 maybe_rotate (sched_verbose ? dump : NULL);
6074 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6076 for (insnp = ready; insnp < e_ready; insnp++)
6077 if (insnp < e_ready)
6080 enum attr_type t = ia64_safe_type (insn);
6081 if (t == TYPE_UNKNOWN)
6083 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6084 || asm_noperands (PATTERN (insn)) >= 0)
6086 rtx lowest = ready[0];
6093 rtx highest = ready[n_ready - 1];
6094 ready[n_ready - 1] = insn;
6096 if (ia64_final_schedule && group_barrier_needed_p (insn))
6098 schedule_stop (sched_verbose ? dump : NULL);
6099 sched_data.last_was_stop = 1;
6100 maybe_rotate (sched_verbose ? dump : NULL);
6107 if (n_asms < n_ready)
6109 /* Some normal insns to process. Skip the asms. */
6113 else if (n_ready > 0)
6115 /* Only asm insns left. */
6116 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6120 if (ia64_final_schedule)
6122 int nr_need_stop = 0;
6124 for (insnp = ready; insnp < e_ready; insnp++)
6125 if (safe_group_barrier_needed_p (*insnp))
6128 /* Schedule a stop bit if
6129 - all insns require a stop bit, or
6130 - we are starting a new cycle and _any_ insns require a stop bit.
6131 The reason for the latter is that if our schedule is accurate, then
6132 the additional stop won't decrease performance at this point (since
6133 there's a split issue at this point anyway), but it gives us more
6134 freedom when scheduling the currently ready insns. */
6135 if ((reorder_type == 0 && nr_need_stop)
6136 || (reorder_type == 1 && n_ready == nr_need_stop))
6138 schedule_stop (sched_verbose ? dump : NULL);
6139 sched_data.last_was_stop = 1;
6140 maybe_rotate (sched_verbose ? dump : NULL);
6141 if (reorder_type == 1)
6148 /* Move down everything that needs a stop bit, preserving relative
6150 while (insnp-- > ready + deleted)
6151 while (insnp >= ready + deleted)
6154 if (! safe_group_barrier_needed_p (insn))
6156 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6162 if (deleted != nr_need_stop)
6167 return itanium_reorder (sched_verbose ? dump : NULL,
6168 ready, e_ready, reorder_type == 1);
6172 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6179 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6180 pn_ready, 0, clock_var);
6183 /* Like ia64_sched_reorder, but called after issuing each insn.
6184 Override the default sort algorithm to better slot instructions. */
6187 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6188 FILE *dump ATTRIBUTE_UNUSED;
6189 int sched_verbose ATTRIBUTE_UNUSED;
6194 if (sched_data.last_was_stop)
6197 /* Detect one special case and try to optimize it.
6198 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6199 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6200 if (sched_data.first_slot == 1
6201 && sched_data.stopbit[0]
6202 && ((sched_data.cur == 4
6203 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6204 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6205 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6206 || (sched_data.cur == 3
6207 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6208 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
6209 && sched_data.types[2] != TYPE_A))))
6213 rtx stop = PREV_INSN (sched_data.insns[1]);
6216 sched_data.stopbit[0] = 0;
6217 sched_data.stopbit[2] = 1;
6218 if (GET_CODE (stop) != INSN)
6221 pat = PATTERN (stop);
6222 /* Ignore cycle displays. */
6223 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
6224 stop = PREV_INSN (stop);
6225 pat = PATTERN (stop);
6226 if (GET_CODE (pat) != UNSPEC_VOLATILE
6227 || XINT (pat, 1) != 2
6228 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
6230 XVECEXP (pat, 0, 0) = GEN_INT (3);
6232 sched_data.types[5] = sched_data.types[3];
6233 sched_data.types[4] = sched_data.types[2];
6234 sched_data.types[3] = sched_data.types[1];
6235 sched_data.insns[5] = sched_data.insns[3];
6236 sched_data.insns[4] = sched_data.insns[2];
6237 sched_data.insns[3] = sched_data.insns[1];
6238 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6239 sched_data.cur += 2;
6240 sched_data.first_slot = 3;
6241 for (i = 0; i < NR_PACKETS; i++)
6243 const struct ia64_packet *p = packets + i;
6244 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6246 sched_data.packet = p;
6250 rotate_one_bundle (sched_verbose ? dump : NULL);
6253 for (i = 0; i < NR_PACKETS; i++)
6255 const struct ia64_packet *p = packets + i;
6256 int split = get_split (p, sched_data.first_slot);
6259 /* Disallow multiway branches here. */
6260 if (p->t[1] == TYPE_B)
6263 if (packet_matches_p (p, split, &next) && next < best)
6266 sched_data.packet = p;
6267 sched_data.split = split;
6276 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6281 /* Did we schedule a stop? If so, finish this cycle. */
6282 if (sched_data.cur == sched_data.first_slot)
6287 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6289 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6291 dump_current_packet (dump);
6295 /* We are about to issue INSN. Return the number of insns left on the
6296 ready queue that can be issued this cycle. */
6299 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6303 int can_issue_more ATTRIBUTE_UNUSED;
6305 enum attr_type t = ia64_safe_type (insn);
6309 if (sched_data.last_was_stop)
6311 int t = sched_data.first_slot;
6314 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6315 init_insn_group_barriers ();
6316 sched_data.last_was_stop = 0;
6319 if (t == TYPE_UNKNOWN)
6322 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6323 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6324 || asm_noperands (PATTERN (insn)) >= 0)
6326 /* This must be some kind of asm. Clear the scheduling state. */
6327 rotate_two_bundles (sched_verbose ? dump : NULL);
6328 if (ia64_final_schedule)
6329 group_barrier_needed_p (insn);
6334 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6335 important state info. Don't delete this test. */
6336 if (ia64_final_schedule
6337 && group_barrier_needed_p (insn))
6340 sched_data.stopbit[sched_data.cur] = 0;
6341 sched_data.insns[sched_data.cur] = insn;
6342 sched_data.types[sched_data.cur] = t;
6346 fprintf (dump, "// Scheduling insn %d of type %s\n",
6347 INSN_UID (insn), type_names[t]);
6349 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6351 schedule_stop (sched_verbose ? dump : NULL);
6352 sched_data.last_was_stop = 1;
6358 /* Free data allocated by ia64_sched_init. */
6361 ia64_sched_finish (dump, sched_verbose)
6366 fprintf (dump, "// Finishing schedule.\n");
6367 rotate_two_bundles (NULL);
6373 ia64_cycle_display (clock, last)
6377 return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last);
6380 /* Emit pseudo-ops for the assembler to describe predicate relations.
6381 At present this assumes that we only consider predicate pairs to
6382 be mutex, and that the assembler can deduce proper values from
6383 straight-line code. */
6386 emit_predicate_relation_info ()
6390 for (i = n_basic_blocks - 1; i >= 0; --i)
6392 basic_block bb = BASIC_BLOCK (i);
6394 rtx head = bb->head;
6396 /* We only need such notes at code labels. */
6397 if (GET_CODE (head) != CODE_LABEL)
6399 if (GET_CODE (NEXT_INSN (head)) == NOTE
6400 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6401 head = NEXT_INSN (head);
6403 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6404 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6406 rtx p = gen_rtx_REG (BImode, r);
6407 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6408 if (head == bb->end)
6414 /* Look for conditional calls that do not return, and protect predicate
6415 relations around them. Otherwise the assembler will assume the call
6416 returns, and complain about uses of call-clobbered predicates after
6418 for (i = n_basic_blocks - 1; i >= 0; --i)
6420 basic_block bb = BASIC_BLOCK (i);
6421 rtx insn = bb->head;
6425 if (GET_CODE (insn) == CALL_INSN
6426 && GET_CODE (PATTERN (insn)) == COND_EXEC
6427 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6429 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6430 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6431 if (bb->head == insn)
6433 if (bb->end == insn)
6437 if (insn == bb->end)
6439 insn = NEXT_INSN (insn);
6444 /* Generate a NOP instruction of type T. We will never generate L type
6454 return gen_nop_m ();
6456 return gen_nop_i ();
6458 return gen_nop_b ();
6460 return gen_nop_f ();
6462 return gen_nop_x ();
6468 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6469 here than while scheduling. */
6475 const struct bundle *b = 0;
6478 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6482 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6483 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6485 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6486 || GET_CODE (insn) == CODE_LABEL)
6489 while (bundle_pos < 3)
6491 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6494 if (GET_CODE (insn) != CODE_LABEL)
6495 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6501 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6503 int t = INTVAL (XVECEXP (pat, 0, 0));
6505 while (bundle_pos < t)
6507 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6513 if (bundle_pos == 3)
6516 if (b && INSN_P (insn))
6518 t = ia64_safe_type (insn);
6519 if (asm_noperands (PATTERN (insn)) >= 0
6520 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6522 while (bundle_pos < 3)
6524 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6530 if (t == TYPE_UNKNOWN)
6532 while (bundle_pos < 3)
6534 if (t == b->t[bundle_pos]
6535 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6536 || b->t[bundle_pos] == TYPE_I)))
6539 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6548 /* Perform machine dependent operations on the rtl chain INSNS. */
6554 /* If optimizing, we'll have split before scheduling. */
6556 split_all_insns_noflow ();
6558 /* Make sure the CFG and global_live_at_start are correct
6559 for emit_predicate_relation_info. */
6560 find_basic_blocks (insns, max_reg_num (), NULL);
6561 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6563 if (ia64_flag_schedule_insns2)
6565 ia64_final_schedule = 1;
6566 schedule_ebbs (rtl_dump_file);
6567 ia64_final_schedule = 0;
6569 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6570 place as they were during scheduling. */
6571 emit_insn_group_barriers (rtl_dump_file, insns);
6575 emit_all_insn_group_barriers (rtl_dump_file, insns);
6577 /* A call must not be the last instruction in a function, so that the
6578 return address is still within the function, so that unwinding works
6579 properly. Note that IA-64 differs from dwarf2 on this point. */
6580 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6585 insn = get_last_insn ();
6586 if (! INSN_P (insn))
6587 insn = prev_active_insn (insn);
6588 if (GET_CODE (insn) == INSN
6589 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6590 && XINT (PATTERN (insn), 1) == 2)
6593 insn = prev_active_insn (insn);
6595 if (GET_CODE (insn) == CALL_INSN)
6598 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6599 emit_insn (gen_break_f ());
6600 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6605 emit_predicate_relation_info ();
6608 /* Return true if REGNO is used by the epilogue. */
6611 ia64_epilogue_uses (regno)
6614 /* When a function makes a call through a function descriptor, we
6615 will write a (potentially) new value to "gp". After returning
6616 from such a call, we need to make sure the function restores the
6617 original gp-value, even if the function itself does not use the
6619 if (regno == R_GR (1)
6621 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
6624 /* For functions defined with the syscall_linkage attribute, all input
6625 registers are marked as live at all function exits. This prevents the
6626 register allocator from using the input registers, which in turn makes it
6627 possible to restart a system call after an interrupt without having to
6628 save/restore the input registers. This also prevents kernel data from
6629 leaking to application code. */
6631 if (IN_REGNO_P (regno)
6632 && lookup_attribute ("syscall_linkage",
6633 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6636 /* Conditional return patterns can't represent the use of `b0' as
6637 the return address, so we force the value live this way. */
6638 if (regno == R_BR (0))
6641 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6643 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6645 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6646 && regno == AR_UNAT_REGNUM)
6652 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
6655 ia64_valid_type_attribute (type, attributes, identifier, args)
6657 tree attributes ATTRIBUTE_UNUSED;
6661 /* We only support an attribute for function calls. */
6663 if (TREE_CODE (type) != FUNCTION_TYPE
6664 && TREE_CODE (type) != METHOD_TYPE)
6667 /* The "syscall_linkage" attribute says the callee is a system call entry
6668 point. This affects ia64_epilogue_uses. */
6670 if (is_attribute_p ("syscall_linkage", identifier))
6671 return args == NULL_TREE;
6676 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6678 We add @ to the name if this goes in small data/bss. We can only put
6679 a variable in small data/bss if it is defined in this module or a module
6680 that we are statically linked with. We can't check the second condition,
6681 but TREE_STATIC gives us the first one. */
6683 /* ??? If we had IPA, we could check the second condition. We could support
6684 programmer added section attributes if the variable is not defined in this
6687 /* ??? See the v850 port for a cleaner way to do this. */
6689 /* ??? We could also support own long data here. Generating movl/add/ld8
6690 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6691 code faster because there is one less load. This also includes incomplete
6692 types which can't go in sdata/sbss. */
6694 /* ??? See select_section. We must put short own readonly variables in
6695 sdata/sbss instead of the more natural rodata, because we can't perform
6696 the DECL_READONLY_SECTION test here. */
6698 extern struct obstack * saveable_obstack;
6701 ia64_encode_section_info (decl)
6704 const char *symbol_str;
6706 if (TREE_CODE (decl) == FUNCTION_DECL)
6708 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6712 /* Careful not to prod global register variables. */
6713 if (TREE_CODE (decl) != VAR_DECL
6714 || GET_CODE (DECL_RTL (decl)) != MEM
6715 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6718 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6720 /* We assume that -fpic is used only to create a shared library (dso).
6721 With -fpic, no global data can ever be sdata.
6722 Without -fpic, global common uninitialized data can never be sdata, since
6723 it can unify with a real definition in a dso. */
6724 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6725 to access them. The linker may then be able to do linker relaxation to
6726 optimize references to them. Currently sdata implies use of gprel. */
6727 /* We need the DECL_EXTERNAL check for C++. static class data members get
6728 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6729 statically allocated, but the space is allocated somewhere else. Such
6730 decls can not be own data. */
6731 if (! TARGET_NO_SDATA
6732 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6733 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6734 && ! (TREE_PUBLIC (decl)
6736 || (DECL_COMMON (decl)
6737 && (DECL_INITIAL (decl) == 0
6738 || DECL_INITIAL (decl) == error_mark_node))))
6739 /* Either the variable must be declared without a section attribute,
6740 or the section must be sdata or sbss. */
6741 && (DECL_SECTION_NAME (decl) == 0
6742 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6744 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6747 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6749 /* If the variable has already been defined in the output file, then it
6750 is too late to put it in sdata if it wasn't put there in the first
6751 place. The test is here rather than above, because if it is already
6752 in sdata, then it can stay there. */
6754 if (TREE_ASM_WRITTEN (decl))
6757 /* If this is an incomplete type with size 0, then we can't put it in
6758 sdata because it might be too big when completed. */
6760 && size <= (HOST_WIDE_INT) ia64_section_threshold
6761 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6763 size_t len = strlen (symbol_str);
6764 char *newstr = alloca (len + 1);
6767 *newstr = SDATA_NAME_FLAG_CHAR;
6768 memcpy (newstr + 1, symbol_str, len + 1);
6770 string = ggc_alloc_string (newstr, len + 1);
6771 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
6774 /* This decl is marked as being in small data/bss but it shouldn't
6775 be; one likely explanation for this is that the decl has been
6776 moved into a different section from the one it was in when
6777 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6778 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6780 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6781 = ggc_strdup (symbol_str + 1);
6785 /* Output assembly directives for prologue regions. */
6787 /* The current basic block number. */
6789 static int block_num;
6791 /* True if we need a copy_state command at the start of the next block. */
6793 static int need_copy_state;
6795 /* The function emits unwind directives for the start of an epilogue. */
6800 /* If this isn't the last block of the function, then we need to label the
6801 current state, and copy it back in at the start of the next block. */
6803 if (block_num != n_basic_blocks - 1)
6805 fprintf (asm_out_file, "\t.label_state 1\n");
6806 need_copy_state = 1;
6809 fprintf (asm_out_file, "\t.restore sp\n");
6812 /* This function processes a SET pattern looking for specific patterns
6813 which result in emitting an assembly directive required for unwinding. */
6816 process_set (asm_out_file, pat)
6820 rtx src = SET_SRC (pat);
6821 rtx dest = SET_DEST (pat);
6822 int src_regno, dest_regno;
6824 /* Look for the ALLOC insn. */
6825 if (GET_CODE (src) == UNSPEC_VOLATILE
6826 && XINT (src, 1) == 0
6827 && GET_CODE (dest) == REG)
6829 dest_regno = REGNO (dest);
6831 /* If this isn't the final destination for ar.pfs, the alloc
6832 shouldn't have been marked frame related. */
6833 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6836 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
6837 ia64_dbx_register_number (dest_regno));
6841 /* Look for SP = .... */
6842 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6844 if (GET_CODE (src) == PLUS)
6846 rtx op0 = XEXP (src, 0);
6847 rtx op1 = XEXP (src, 1);
6848 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6850 if (INTVAL (op1) < 0)
6852 fputs ("\t.fframe ", asm_out_file);
6853 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6855 fputc ('\n', asm_out_file);
6858 process_epilogue ();
6863 else if (GET_CODE (src) == REG
6864 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
6865 process_epilogue ();
6872 /* Register move we need to look at. */
6873 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6875 src_regno = REGNO (src);
6876 dest_regno = REGNO (dest);
6881 /* Saving return address pointer. */
6882 if (dest_regno != current_frame_info.reg_save_b0)
6884 fprintf (asm_out_file, "\t.save rp, r%d\n",
6885 ia64_dbx_register_number (dest_regno));
6889 if (dest_regno != current_frame_info.reg_save_pr)
6891 fprintf (asm_out_file, "\t.save pr, r%d\n",
6892 ia64_dbx_register_number (dest_regno));
6895 case AR_UNAT_REGNUM:
6896 if (dest_regno != current_frame_info.reg_save_ar_unat)
6898 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6899 ia64_dbx_register_number (dest_regno));
6903 if (dest_regno != current_frame_info.reg_save_ar_lc)
6905 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6906 ia64_dbx_register_number (dest_regno));
6909 case STACK_POINTER_REGNUM:
6910 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6911 || ! frame_pointer_needed)
6913 fprintf (asm_out_file, "\t.vframe r%d\n",
6914 ia64_dbx_register_number (dest_regno));
6918 /* Everything else should indicate being stored to memory. */
6923 /* Memory store we need to look at. */
6924 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
6930 if (GET_CODE (XEXP (dest, 0)) == REG)
6932 base = XEXP (dest, 0);
6935 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6936 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
6938 base = XEXP (XEXP (dest, 0), 0);
6939 off = INTVAL (XEXP (XEXP (dest, 0), 1));
6944 if (base == hard_frame_pointer_rtx)
6946 saveop = ".savepsp";
6949 else if (base == stack_pointer_rtx)
6954 src_regno = REGNO (src);
6958 if (current_frame_info.reg_save_b0 != 0)
6960 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6964 if (current_frame_info.reg_save_pr != 0)
6966 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6970 if (current_frame_info.reg_save_ar_lc != 0)
6972 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6976 if (current_frame_info.reg_save_ar_pfs != 0)
6978 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6981 case AR_UNAT_REGNUM:
6982 if (current_frame_info.reg_save_ar_unat != 0)
6984 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6991 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6992 1 << (src_regno - GR_REG (4)));
7000 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7001 1 << (src_regno - BR_REG (1)));
7008 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7009 1 << (src_regno - FR_REG (2)));
7012 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7013 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7014 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7015 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7016 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7017 1 << (src_regno - FR_REG (12)));
7029 /* This function looks at a single insn and emits any directives
7030 required to unwind this insn. */
7032 process_for_unwind_directive (asm_out_file, insn)
7036 if (flag_unwind_tables
7037 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7041 if (GET_CODE (insn) == NOTE
7042 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7044 block_num = NOTE_BASIC_BLOCK (insn)->index;
7046 /* Restore unwind state from immediately before the epilogue. */
7047 if (need_copy_state)
7049 fprintf (asm_out_file, "\t.body\n");
7050 fprintf (asm_out_file, "\t.copy_state 1\n");
7051 need_copy_state = 0;
7055 if (! RTX_FRAME_RELATED_P (insn))
7058 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7060 pat = XEXP (pat, 0);
7062 pat = PATTERN (insn);
7064 switch (GET_CODE (pat))
7067 process_set (asm_out_file, pat);
7073 int limit = XVECLEN (pat, 0);
7074 for (par_index = 0; par_index < limit; par_index++)
7076 rtx x = XVECEXP (pat, 0, par_index);
7077 if (GET_CODE (x) == SET)
7078 process_set (asm_out_file, x);
7091 ia64_init_builtins ()
7093 tree psi_type_node = build_pointer_type (integer_type_node);
7094 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7095 tree endlink = void_list_node;
7097 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7098 tree si_ftype_psi_si_si
7099 = build_function_type (integer_type_node,
7100 tree_cons (NULL_TREE, psi_type_node,
7101 tree_cons (NULL_TREE, integer_type_node,
7102 tree_cons (NULL_TREE,
7106 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7107 tree di_ftype_pdi_di_di
7108 = build_function_type (long_integer_type_node,
7109 tree_cons (NULL_TREE, pdi_type_node,
7110 tree_cons (NULL_TREE,
7111 long_integer_type_node,
7112 tree_cons (NULL_TREE,
7113 long_integer_type_node,
7115 /* __sync_synchronize */
7116 tree void_ftype_void
7117 = build_function_type (void_type_node, endlink);
7119 /* __sync_lock_test_and_set_si */
7120 tree si_ftype_psi_si
7121 = build_function_type (integer_type_node,
7122 tree_cons (NULL_TREE, psi_type_node,
7123 tree_cons (NULL_TREE, integer_type_node, endlink)));
7125 /* __sync_lock_test_and_set_di */
7126 tree di_ftype_pdi_di
7127 = build_function_type (long_integer_type_node,
7128 tree_cons (NULL_TREE, pdi_type_node,
7129 tree_cons (NULL_TREE, long_integer_type_node,
7132 /* __sync_lock_release_si */
7134 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7137 /* __sync_lock_release_di */
7139 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7142 #define def_builtin(name, type, code) \
7143 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
7145 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7146 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7147 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7148 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7149 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7150 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7151 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7152 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7154 def_builtin ("__sync_synchronize", void_ftype_void,
7155 IA64_BUILTIN_SYNCHRONIZE);
7157 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7158 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7159 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7160 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7161 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7162 IA64_BUILTIN_LOCK_RELEASE_SI);
7163 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7164 IA64_BUILTIN_LOCK_RELEASE_DI);
7166 def_builtin ("__builtin_ia64_bsp",
7167 build_function_type (ptr_type_node, endlink),
7170 def_builtin ("__builtin_ia64_flushrs",
7171 build_function_type (void_type_node, endlink),
7172 IA64_BUILTIN_FLUSHRS);
7174 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7175 IA64_BUILTIN_FETCH_AND_ADD_SI);
7176 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7177 IA64_BUILTIN_FETCH_AND_SUB_SI);
7178 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7179 IA64_BUILTIN_FETCH_AND_OR_SI);
7180 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7181 IA64_BUILTIN_FETCH_AND_AND_SI);
7182 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7183 IA64_BUILTIN_FETCH_AND_XOR_SI);
7184 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7185 IA64_BUILTIN_FETCH_AND_NAND_SI);
7187 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7188 IA64_BUILTIN_ADD_AND_FETCH_SI);
7189 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7190 IA64_BUILTIN_SUB_AND_FETCH_SI);
7191 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7192 IA64_BUILTIN_OR_AND_FETCH_SI);
7193 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7194 IA64_BUILTIN_AND_AND_FETCH_SI);
7195 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7196 IA64_BUILTIN_XOR_AND_FETCH_SI);
7197 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7198 IA64_BUILTIN_NAND_AND_FETCH_SI);
7200 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7201 IA64_BUILTIN_FETCH_AND_ADD_DI);
7202 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7203 IA64_BUILTIN_FETCH_AND_SUB_DI);
7204 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7205 IA64_BUILTIN_FETCH_AND_OR_DI);
7206 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7207 IA64_BUILTIN_FETCH_AND_AND_DI);
7208 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7209 IA64_BUILTIN_FETCH_AND_XOR_DI);
7210 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7211 IA64_BUILTIN_FETCH_AND_NAND_DI);
7213 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7214 IA64_BUILTIN_ADD_AND_FETCH_DI);
7215 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7216 IA64_BUILTIN_SUB_AND_FETCH_DI);
7217 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7218 IA64_BUILTIN_OR_AND_FETCH_DI);
7219 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7220 IA64_BUILTIN_AND_AND_FETCH_DI);
7221 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7222 IA64_BUILTIN_XOR_AND_FETCH_DI);
7223 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7224 IA64_BUILTIN_NAND_AND_FETCH_DI);
7229 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7237 cmpxchgsz.acq tmp = [ptr], tmp
7238 } while (tmp != ret)
7242 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7244 enum machine_mode mode;
7248 rtx ret, label, tmp, ccv, insn, mem, value;
7251 arg0 = TREE_VALUE (arglist);
7252 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7253 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7254 value = expand_expr (arg1, NULL_RTX, mode, 0);
7256 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7257 MEM_VOLATILE_P (mem) = 1;
7259 if (target && register_operand (target, mode))
7262 ret = gen_reg_rtx (mode);
7264 emit_insn (gen_mf ());
7266 /* Special case for fetchadd instructions. */
7267 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7270 insn = gen_fetchadd_acq_si (ret, mem, value);
7272 insn = gen_fetchadd_acq_di (ret, mem, value);
7277 tmp = gen_reg_rtx (mode);
7278 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7279 emit_move_insn (tmp, mem);
7281 label = gen_label_rtx ();
7283 emit_move_insn (ret, tmp);
7284 emit_move_insn (ccv, tmp);
7286 /* Perform the specific operation. Special case NAND by noticing
7287 one_cmpl_optab instead. */
7288 if (binoptab == one_cmpl_optab)
7290 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7291 binoptab = and_optab;
7293 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7296 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7298 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7301 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
7306 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7314 cmpxchgsz.acq tmp = [ptr], ret
7315 } while (tmp != old)
7319 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7321 enum machine_mode mode;
7325 rtx old, label, tmp, ret, ccv, insn, mem, value;
7328 arg0 = TREE_VALUE (arglist);
7329 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7330 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7331 value = expand_expr (arg1, NULL_RTX, mode, 0);
7333 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7334 MEM_VOLATILE_P (mem) = 1;
7336 if (target && ! register_operand (target, mode))
7339 emit_insn (gen_mf ());
7340 tmp = gen_reg_rtx (mode);
7341 old = gen_reg_rtx (mode);
7342 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7344 emit_move_insn (tmp, mem);
7346 label = gen_label_rtx ();
7348 emit_move_insn (old, tmp);
7349 emit_move_insn (ccv, tmp);
7351 /* Perform the specific operation. Special case NAND by noticing
7352 one_cmpl_optab instead. */
7353 if (binoptab == one_cmpl_optab)
7355 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7356 binoptab = and_optab;
7358 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7361 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7363 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7366 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
7371 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7375 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7378 For bool_ it's the same except return ret == oldval.
7382 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7383 enum machine_mode mode;
7388 tree arg0, arg1, arg2;
7389 rtx mem, old, new, ccv, tmp, insn;
7391 arg0 = TREE_VALUE (arglist);
7392 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7393 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7394 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7395 old = expand_expr (arg1, NULL_RTX, mode, 0);
7396 new = expand_expr (arg2, NULL_RTX, mode, 0);
7398 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7399 MEM_VOLATILE_P (mem) = 1;
7401 if (! register_operand (old, mode))
7402 old = copy_to_mode_reg (mode, old);
7403 if (! register_operand (new, mode))
7404 new = copy_to_mode_reg (mode, new);
7406 if (! boolp && target && register_operand (target, mode))
7409 tmp = gen_reg_rtx (mode);
7411 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7412 emit_move_insn (ccv, old);
7413 emit_insn (gen_mf ());
7415 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7417 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7423 target = gen_reg_rtx (mode);
7424 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7430 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7433 ia64_expand_lock_test_and_set (mode, arglist, target)
7434 enum machine_mode mode;
7439 rtx mem, new, ret, insn;
7441 arg0 = TREE_VALUE (arglist);
7442 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7443 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7444 new = expand_expr (arg1, NULL_RTX, mode, 0);
7446 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7447 MEM_VOLATILE_P (mem) = 1;
7448 if (! register_operand (new, mode))
7449 new = copy_to_mode_reg (mode, new);
7451 if (target && register_operand (target, mode))
7454 ret = gen_reg_rtx (mode);
7457 insn = gen_xchgsi (ret, mem, new);
7459 insn = gen_xchgdi (ret, mem, new);
7465 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7468 ia64_expand_lock_release (mode, arglist, target)
7469 enum machine_mode mode;
7471 rtx target ATTRIBUTE_UNUSED;
7476 arg0 = TREE_VALUE (arglist);
7477 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7479 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7480 MEM_VOLATILE_P (mem) = 1;
7482 emit_move_insn (mem, const0_rtx);
7488 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7491 rtx subtarget ATTRIBUTE_UNUSED;
7492 enum machine_mode mode ATTRIBUTE_UNUSED;
7493 int ignore ATTRIBUTE_UNUSED;
7495 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7496 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7497 tree arglist = TREE_OPERAND (exp, 1);
7501 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7502 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7503 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7504 case IA64_BUILTIN_LOCK_RELEASE_SI:
7505 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7506 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7507 case IA64_BUILTIN_FETCH_AND_OR_SI:
7508 case IA64_BUILTIN_FETCH_AND_AND_SI:
7509 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7510 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7511 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7512 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7513 case IA64_BUILTIN_OR_AND_FETCH_SI:
7514 case IA64_BUILTIN_AND_AND_FETCH_SI:
7515 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7516 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7520 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7521 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7522 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7523 case IA64_BUILTIN_LOCK_RELEASE_DI:
7524 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7525 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7526 case IA64_BUILTIN_FETCH_AND_OR_DI:
7527 case IA64_BUILTIN_FETCH_AND_AND_DI:
7528 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7529 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7530 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7531 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7532 case IA64_BUILTIN_OR_AND_FETCH_DI:
7533 case IA64_BUILTIN_AND_AND_FETCH_DI:
7534 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7535 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7545 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7546 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7547 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7549 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7550 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7551 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7553 case IA64_BUILTIN_SYNCHRONIZE:
7554 emit_insn (gen_mf ());
7557 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7558 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7559 return ia64_expand_lock_test_and_set (mode, arglist, target);
7561 case IA64_BUILTIN_LOCK_RELEASE_SI:
7562 case IA64_BUILTIN_LOCK_RELEASE_DI:
7563 return ia64_expand_lock_release (mode, arglist, target);
7565 case IA64_BUILTIN_BSP:
7566 if (! target || ! register_operand (target, DImode))
7567 target = gen_reg_rtx (DImode);
7568 emit_insn (gen_bsp_value (target));
7571 case IA64_BUILTIN_FLUSHRS:
7572 emit_insn (gen_flushrs ());
7575 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7576 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7577 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7579 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7580 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7581 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7583 case IA64_BUILTIN_FETCH_AND_OR_SI:
7584 case IA64_BUILTIN_FETCH_AND_OR_DI:
7585 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7587 case IA64_BUILTIN_FETCH_AND_AND_SI:
7588 case IA64_BUILTIN_FETCH_AND_AND_DI:
7589 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7591 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7592 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7593 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7595 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7596 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7597 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7599 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7600 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7601 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7603 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7604 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7605 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7607 case IA64_BUILTIN_OR_AND_FETCH_SI:
7608 case IA64_BUILTIN_OR_AND_FETCH_DI:
7609 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7611 case IA64_BUILTIN_AND_AND_FETCH_SI:
7612 case IA64_BUILTIN_AND_AND_FETCH_DI:
7613 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7615 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7616 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7617 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7619 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7620 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7621 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);