1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
43 #include "sched-int.h"
46 #include "target-def.h"
49 /* This is used for communication between ASM_OUTPUT_LABEL and
50 ASM_OUTPUT_LABELREF. */
51 int ia64_asm_output_label = 0;
53 /* Define the information needed to generate branch and scc insns. This is
54 stored from the compare operation. */
55 struct rtx_def * ia64_compare_op0;
56 struct rtx_def * ia64_compare_op1;
58 /* Register names for ia64_expand_prologue. */
59 static const char * const ia64_reg_numbers[96] =
60 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
61 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
62 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
63 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
64 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
65 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
66 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
67 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
68 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
69 "r104","r105","r106","r107","r108","r109","r110","r111",
70 "r112","r113","r114","r115","r116","r117","r118","r119",
71 "r120","r121","r122","r123","r124","r125","r126","r127"};
73 /* ??? These strings could be shared with REGISTER_NAMES. */
74 static const char * const ia64_input_reg_names[8] =
75 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77 /* ??? These strings could be shared with REGISTER_NAMES. */
78 static const char * const ia64_local_reg_names[80] =
79 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
80 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
81 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
82 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
83 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
84 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
85 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
86 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
87 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
88 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_output_reg_names[8] =
92 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94 /* String used with the -mfixed-range= option. */
95 const char *ia64_fixed_range_string;
97 /* Determines whether we use adds, addl, or movl to generate our
98 TLS immediate offsets. */
99 int ia64_tls_size = 22;
101 /* String used with the -mtls-size= option. */
102 const char *ia64_tls_size_string;
104 /* Determines whether we run our final scheduling pass or not. We always
105 avoid the normal second scheduling pass. */
106 static int ia64_flag_schedule_insns2;
108 /* Variables which are this size or smaller are put in the sdata/sbss
111 unsigned int ia64_section_threshold;
113 static rtx gen_tls_get_addr PARAMS ((void));
114 static rtx gen_thread_pointer PARAMS ((void));
115 static int find_gr_spill PARAMS ((int));
116 static int next_scratch_gr_reg PARAMS ((void));
117 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
118 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
119 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
120 static void finish_spill_pointers PARAMS ((void));
121 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
122 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
123 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
124 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
125 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
126 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
128 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
129 static void fix_range PARAMS ((const char *));
130 static struct machine_function * ia64_init_machine_status PARAMS ((void));
131 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
132 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
133 static void emit_predicate_relation_info PARAMS ((void));
134 static bool ia64_in_small_data_p PARAMS ((tree));
135 static void ia64_encode_section_info PARAMS ((tree, int));
136 static const char *ia64_strip_name_encoding PARAMS ((const char *));
137 static void process_epilogue PARAMS ((void));
138 static int process_set PARAMS ((FILE *, rtx));
140 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
142 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
144 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
146 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
148 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
149 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
150 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
151 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
152 static void ia64_output_function_end_prologue PARAMS ((FILE *));
154 static int ia64_issue_rate PARAMS ((void));
155 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
156 static void ia64_sched_init PARAMS ((FILE *, int, int));
157 static void ia64_sched_finish PARAMS ((FILE *, int));
158 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
160 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
161 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
162 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
164 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
165 unsigned HOST_WIDE_INT));
166 static void ia64_aix_select_section PARAMS ((tree, int,
167 unsigned HOST_WIDE_INT))
169 static void ia64_aix_unique_section PARAMS ((tree, int))
171 static void ia64_aix_select_rtx_section PARAMS ((enum machine_mode, rtx,
172 unsigned HOST_WIDE_INT))
175 /* Table of valid machine attributes. */
176 static const struct attribute_spec ia64_attribute_table[] =
178 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
179 { "syscall_linkage", 0, 0, false, true, true, NULL },
180 { NULL, 0, 0, false, false, false, NULL }
183 /* Initialize the GCC target structure. */
184 #undef TARGET_ATTRIBUTE_TABLE
185 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
187 #undef TARGET_INIT_BUILTINS
188 #define TARGET_INIT_BUILTINS ia64_init_builtins
190 #undef TARGET_EXPAND_BUILTIN
191 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
193 #undef TARGET_ASM_BYTE_OP
194 #define TARGET_ASM_BYTE_OP "\tdata1\t"
195 #undef TARGET_ASM_ALIGNED_HI_OP
196 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
197 #undef TARGET_ASM_ALIGNED_SI_OP
198 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
199 #undef TARGET_ASM_ALIGNED_DI_OP
200 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
201 #undef TARGET_ASM_UNALIGNED_HI_OP
202 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
203 #undef TARGET_ASM_UNALIGNED_SI_OP
204 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
205 #undef TARGET_ASM_UNALIGNED_DI_OP
206 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
207 #undef TARGET_ASM_INTEGER
208 #define TARGET_ASM_INTEGER ia64_assemble_integer
210 #undef TARGET_ASM_FUNCTION_PROLOGUE
211 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
212 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
213 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
214 #undef TARGET_ASM_FUNCTION_EPILOGUE
215 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
217 #undef TARGET_IN_SMALL_DATA_P
218 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
219 #undef TARGET_ENCODE_SECTION_INFO
220 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
221 #undef TARGET_STRIP_NAME_ENCODING
222 #define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
224 #undef TARGET_SCHED_ADJUST_COST
225 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
226 #undef TARGET_SCHED_ISSUE_RATE
227 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
228 #undef TARGET_SCHED_VARIABLE_ISSUE
229 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
230 #undef TARGET_SCHED_INIT
231 #define TARGET_SCHED_INIT ia64_sched_init
232 #undef TARGET_SCHED_FINISH
233 #define TARGET_SCHED_FINISH ia64_sched_finish
234 #undef TARGET_SCHED_REORDER
235 #define TARGET_SCHED_REORDER ia64_sched_reorder
236 #undef TARGET_SCHED_REORDER2
237 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
240 #undef TARGET_HAVE_TLS
241 #define TARGET_HAVE_TLS true
244 struct gcc_target targetm = TARGET_INITIALIZER;
246 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
249 call_operand (op, mode)
251 enum machine_mode mode;
253 if (mode != GET_MODE (op) && mode != VOIDmode)
256 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
257 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
260 /* Return 1 if OP refers to a symbol in the sdata section. */
263 sdata_symbolic_operand (op, mode)
265 enum machine_mode mode ATTRIBUTE_UNUSED;
267 switch (GET_CODE (op))
270 if (GET_CODE (XEXP (op, 0)) != PLUS
271 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
273 op = XEXP (XEXP (op, 0), 0);
277 if (CONSTANT_POOL_ADDRESS_P (op))
278 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
281 const char *str = XSTR (op, 0);
282 return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
292 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
295 got_symbolic_operand (op, mode)
297 enum machine_mode mode ATTRIBUTE_UNUSED;
299 switch (GET_CODE (op))
303 if (GET_CODE (op) != PLUS)
305 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
308 if (GET_CODE (op) != CONST_INT)
313 /* Ok if we're not using GOT entries at all. */
314 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
317 /* "Ok" while emitting rtl, since otherwise we won't be provided
318 with the entire offset during emission, which makes it very
319 hard to split the offset into high and low parts. */
320 if (rtx_equal_function_value_matters)
323 /* Force the low 14 bits of the constant to zero so that we do not
324 use up so many GOT entries. */
325 return (INTVAL (op) & 0x3fff) == 0;
337 /* Return 1 if OP refers to a symbol. */
340 symbolic_operand (op, mode)
342 enum machine_mode mode ATTRIBUTE_UNUSED;
344 switch (GET_CODE (op))
357 /* Return tls_model if OP refers to a TLS symbol. */
360 tls_symbolic_operand (op, mode)
362 enum machine_mode mode ATTRIBUTE_UNUSED;
366 if (GET_CODE (op) != SYMBOL_REF)
369 if (str[0] != ENCODE_SECTION_INFO_CHAR)
374 return TLS_MODEL_GLOBAL_DYNAMIC;
376 return TLS_MODEL_LOCAL_DYNAMIC;
378 return TLS_MODEL_INITIAL_EXEC;
380 return TLS_MODEL_LOCAL_EXEC;
386 /* Return 1 if OP refers to a function. */
389 function_operand (op, mode)
391 enum machine_mode mode ATTRIBUTE_UNUSED;
393 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
399 /* Return 1 if OP is setjmp or a similar function. */
401 /* ??? This is an unsatisfying solution. Should rethink. */
404 setjmp_operand (op, mode)
406 enum machine_mode mode ATTRIBUTE_UNUSED;
411 if (GET_CODE (op) != SYMBOL_REF)
416 /* The following code is borrowed from special_function_p in calls.c. */
418 /* Disregard prefix _, __ or __x. */
421 if (name[1] == '_' && name[2] == 'x')
423 else if (name[1] == '_')
433 && (! strcmp (name, "setjmp")
434 || ! strcmp (name, "setjmp_syscall")))
436 && ! strcmp (name, "sigsetjmp"))
438 && ! strcmp (name, "savectx")));
440 else if ((name[0] == 'q' && name[1] == 's'
441 && ! strcmp (name, "qsetjmp"))
442 || (name[0] == 'v' && name[1] == 'f'
443 && ! strcmp (name, "vfork")))
449 /* Return 1 if OP is a general operand, but when pic exclude symbolic
452 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
453 from PREDICATE_CODES. */
456 move_operand (op, mode)
458 enum machine_mode mode;
460 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
463 return general_operand (op, mode);
466 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
469 gr_register_operand (op, mode)
471 enum machine_mode mode;
473 if (! register_operand (op, mode))
475 if (GET_CODE (op) == SUBREG)
476 op = SUBREG_REG (op);
477 if (GET_CODE (op) == REG)
479 unsigned int regno = REGNO (op);
480 if (regno < FIRST_PSEUDO_REGISTER)
481 return GENERAL_REGNO_P (regno);
486 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
489 fr_register_operand (op, mode)
491 enum machine_mode mode;
493 if (! register_operand (op, mode))
495 if (GET_CODE (op) == SUBREG)
496 op = SUBREG_REG (op);
497 if (GET_CODE (op) == REG)
499 unsigned int regno = REGNO (op);
500 if (regno < FIRST_PSEUDO_REGISTER)
501 return FR_REGNO_P (regno);
506 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
509 grfr_register_operand (op, mode)
511 enum machine_mode mode;
513 if (! register_operand (op, mode))
515 if (GET_CODE (op) == SUBREG)
516 op = SUBREG_REG (op);
517 if (GET_CODE (op) == REG)
519 unsigned int regno = REGNO (op);
520 if (regno < FIRST_PSEUDO_REGISTER)
521 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
526 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
529 gr_nonimmediate_operand (op, mode)
531 enum machine_mode mode;
533 if (! nonimmediate_operand (op, mode))
535 if (GET_CODE (op) == SUBREG)
536 op = SUBREG_REG (op);
537 if (GET_CODE (op) == REG)
539 unsigned int regno = REGNO (op);
540 if (regno < FIRST_PSEUDO_REGISTER)
541 return GENERAL_REGNO_P (regno);
546 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
549 fr_nonimmediate_operand (op, mode)
551 enum machine_mode mode;
553 if (! nonimmediate_operand (op, mode))
555 if (GET_CODE (op) == SUBREG)
556 op = SUBREG_REG (op);
557 if (GET_CODE (op) == REG)
559 unsigned int regno = REGNO (op);
560 if (regno < FIRST_PSEUDO_REGISTER)
561 return FR_REGNO_P (regno);
566 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
569 grfr_nonimmediate_operand (op, mode)
571 enum machine_mode mode;
573 if (! nonimmediate_operand (op, mode))
575 if (GET_CODE (op) == SUBREG)
576 op = SUBREG_REG (op);
577 if (GET_CODE (op) == REG)
579 unsigned int regno = REGNO (op);
580 if (regno < FIRST_PSEUDO_REGISTER)
581 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
586 /* Return 1 if OP is a GR register operand, or zero. */
589 gr_reg_or_0_operand (op, mode)
591 enum machine_mode mode;
593 return (op == const0_rtx || gr_register_operand (op, mode));
596 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
599 gr_reg_or_5bit_operand (op, mode)
601 enum machine_mode mode;
603 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
604 || GET_CODE (op) == CONSTANT_P_RTX
605 || gr_register_operand (op, mode));
608 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
611 gr_reg_or_6bit_operand (op, mode)
613 enum machine_mode mode;
615 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
616 || GET_CODE (op) == CONSTANT_P_RTX
617 || gr_register_operand (op, mode));
620 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
623 gr_reg_or_8bit_operand (op, mode)
625 enum machine_mode mode;
627 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
628 || GET_CODE (op) == CONSTANT_P_RTX
629 || gr_register_operand (op, mode));
632 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
635 grfr_reg_or_8bit_operand (op, mode)
637 enum machine_mode mode;
639 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
640 || GET_CODE (op) == CONSTANT_P_RTX
641 || grfr_register_operand (op, mode));
644 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
648 gr_reg_or_8bit_adjusted_operand (op, mode)
650 enum machine_mode mode;
652 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
653 || GET_CODE (op) == CONSTANT_P_RTX
654 || gr_register_operand (op, mode));
657 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
658 immediate and an 8 bit adjusted immediate operand. This is necessary
659 because when we emit a compare, we don't know what the condition will be,
660 so we need the union of the immediates accepted by GT and LT. */
663 gr_reg_or_8bit_and_adjusted_operand (op, mode)
665 enum machine_mode mode;
667 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
668 && CONST_OK_FOR_L (INTVAL (op)))
669 || GET_CODE (op) == CONSTANT_P_RTX
670 || gr_register_operand (op, mode));
673 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
676 gr_reg_or_14bit_operand (op, mode)
678 enum machine_mode mode;
680 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
681 || GET_CODE (op) == CONSTANT_P_RTX
682 || gr_register_operand (op, mode));
685 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
688 gr_reg_or_22bit_operand (op, mode)
690 enum machine_mode mode;
692 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
693 || GET_CODE (op) == CONSTANT_P_RTX
694 || gr_register_operand (op, mode));
697 /* Return 1 if OP is a 6 bit immediate operand. */
700 shift_count_operand (op, mode)
702 enum machine_mode mode ATTRIBUTE_UNUSED;
704 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
705 || GET_CODE (op) == CONSTANT_P_RTX);
708 /* Return 1 if OP is a 5 bit immediate operand. */
711 shift_32bit_count_operand (op, mode)
713 enum machine_mode mode ATTRIBUTE_UNUSED;
715 return ((GET_CODE (op) == CONST_INT
716 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
717 || GET_CODE (op) == CONSTANT_P_RTX);
720 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
723 shladd_operand (op, mode)
725 enum machine_mode mode ATTRIBUTE_UNUSED;
727 return (GET_CODE (op) == CONST_INT
728 && (INTVAL (op) == 2 || INTVAL (op) == 4
729 || INTVAL (op) == 8 || INTVAL (op) == 16));
732 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
735 fetchadd_operand (op, mode)
737 enum machine_mode mode ATTRIBUTE_UNUSED;
739 return (GET_CODE (op) == CONST_INT
740 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
741 INTVAL (op) == -4 || INTVAL (op) == -1 ||
742 INTVAL (op) == 1 || INTVAL (op) == 4 ||
743 INTVAL (op) == 8 || INTVAL (op) == 16));
746 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
749 fr_reg_or_fp01_operand (op, mode)
751 enum machine_mode mode;
753 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
754 || fr_register_operand (op, mode));
757 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
758 POST_MODIFY with a REG as displacement. */
761 destination_operand (op, mode)
763 enum machine_mode mode;
765 if (! nonimmediate_operand (op, mode))
767 if (GET_CODE (op) == MEM
768 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
769 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
774 /* Like memory_operand, but don't allow post-increments. */
777 not_postinc_memory_operand (op, mode)
779 enum machine_mode mode;
781 return (memory_operand (op, mode)
782 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
785 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
786 signed immediate operand. */
789 normal_comparison_operator (op, mode)
791 enum machine_mode mode;
793 enum rtx_code code = GET_CODE (op);
794 return ((mode == VOIDmode || GET_MODE (op) == mode)
795 && (code == EQ || code == NE
796 || code == GT || code == LE || code == GTU || code == LEU));
799 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
800 signed immediate operand. */
803 adjusted_comparison_operator (op, mode)
805 enum machine_mode mode;
807 enum rtx_code code = GET_CODE (op);
808 return ((mode == VOIDmode || GET_MODE (op) == mode)
809 && (code == LT || code == GE || code == LTU || code == GEU));
812 /* Return 1 if this is a signed inequality operator. */
815 signed_inequality_operator (op, mode)
817 enum machine_mode mode;
819 enum rtx_code code = GET_CODE (op);
820 return ((mode == VOIDmode || GET_MODE (op) == mode)
821 && (code == GE || code == GT
822 || code == LE || code == LT));
825 /* Return 1 if this operator is valid for predication. */
828 predicate_operator (op, mode)
830 enum machine_mode mode;
832 enum rtx_code code = GET_CODE (op);
833 return ((GET_MODE (op) == mode || mode == VOIDmode)
834 && (code == EQ || code == NE));
837 /* Return 1 if this operator can be used in a conditional operation. */
840 condop_operator (op, mode)
842 enum machine_mode mode;
844 enum rtx_code code = GET_CODE (op);
845 return ((GET_MODE (op) == mode || mode == VOIDmode)
846 && (code == PLUS || code == MINUS || code == AND
847 || code == IOR || code == XOR));
850 /* Return 1 if this is the ar.lc register. */
853 ar_lc_reg_operand (op, mode)
855 enum machine_mode mode;
857 return (GET_MODE (op) == DImode
858 && (mode == DImode || mode == VOIDmode)
859 && GET_CODE (op) == REG
860 && REGNO (op) == AR_LC_REGNUM);
863 /* Return 1 if this is the ar.ccv register. */
866 ar_ccv_reg_operand (op, mode)
868 enum machine_mode mode;
870 return ((GET_MODE (op) == mode || mode == VOIDmode)
871 && GET_CODE (op) == REG
872 && REGNO (op) == AR_CCV_REGNUM);
875 /* Return 1 if this is the ar.pfs register. */
878 ar_pfs_reg_operand (op, mode)
880 enum machine_mode mode;
882 return ((GET_MODE (op) == mode || mode == VOIDmode)
883 && GET_CODE (op) == REG
884 && REGNO (op) == AR_PFS_REGNUM);
887 /* Like general_operand, but don't allow (mem (addressof)). */
890 general_tfmode_operand (op, mode)
892 enum machine_mode mode;
894 if (! general_operand (op, mode))
896 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
904 destination_tfmode_operand (op, mode)
906 enum machine_mode mode;
908 if (! destination_operand (op, mode))
910 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
918 tfreg_or_fp01_operand (op, mode)
920 enum machine_mode mode;
922 if (GET_CODE (op) == SUBREG)
924 return fr_reg_or_fp01_operand (op, mode);
927 /* Return 1 if OP is valid as a base register in a reg + offset address. */
930 basereg_operand (op, mode)
932 enum machine_mode mode;
934 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
935 checks from pa.c basereg_operand as well? Seems to be OK without them
938 return (register_operand (op, mode) &&
939 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
942 /* Return 1 if the operands of a move are ok. */
945 ia64_move_ok (dst, src)
948 /* If we're under init_recog_no_volatile, we'll not be able to use
949 memory_operand. So check the code directly and don't worry about
950 the validity of the underlying address, which should have been
951 checked elsewhere anyway. */
952 if (GET_CODE (dst) != MEM)
954 if (GET_CODE (src) == MEM)
956 if (register_operand (src, VOIDmode))
959 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
960 if (INTEGRAL_MODE_P (GET_MODE (dst)))
961 return src == const0_rtx;
963 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
966 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
967 Return the length of the field, or <= 0 on failure. */
970 ia64_depz_field_mask (rop, rshift)
973 unsigned HOST_WIDE_INT op = INTVAL (rop);
974 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
976 /* Get rid of the zero bits we're shifting in. */
979 /* We must now have a solid block of 1's at bit 0. */
980 return exact_log2 (op + 1);
983 /* Expand a symbolic constant load. */
984 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
987 ia64_expand_load_address (dest, src, scratch)
988 rtx dest, src, scratch;
992 /* The destination could be a MEM during initial rtl generation,
993 which isn't a valid destination for the PIC load address patterns. */
994 if (! register_operand (dest, DImode))
995 if (! scratch || ! register_operand (scratch, DImode))
996 temp = gen_reg_rtx (DImode);
1002 if (tls_symbolic_operand (src, Pmode))
1005 if (TARGET_AUTO_PIC)
1006 emit_insn (gen_load_gprel64 (temp, src));
1007 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
1008 emit_insn (gen_load_fptr (temp, src));
1009 else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode)
1010 && sdata_symbolic_operand (src, VOIDmode))
1011 emit_insn (gen_load_gprel (temp, src));
1012 else if (GET_CODE (src) == CONST
1013 && GET_CODE (XEXP (src, 0)) == PLUS
1014 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1015 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1017 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1018 rtx sym = XEXP (XEXP (src, 0), 0);
1019 HOST_WIDE_INT ofs, hi, lo;
1021 /* Split the offset into a sign extended 14-bit low part
1022 and a complementary high part. */
1023 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1024 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1028 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
1030 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
1032 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
1038 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1040 insn = emit_insn (gen_load_symptr (temp, src, scratch));
1041 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
1046 if (GET_MODE (dest) != GET_MODE (temp))
1047 temp = convert_to_mode (GET_MODE (dest), temp, 0);
1048 emit_move_insn (dest, temp);
1052 static GTY(()) rtx gen_tls_tga;
1058 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1063 static GTY(()) rtx thread_pointer_rtx;
1065 gen_thread_pointer ()
1067 if (!thread_pointer_rtx)
1069 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1070 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1072 return thread_pointer_rtx;
1076 ia64_expand_move (op0, op1)
1079 enum machine_mode mode = GET_MODE (op0);
1081 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1082 op1 = force_reg (mode, op1);
1084 if (mode == Pmode || mode == ptr_mode)
1086 enum tls_model tls_kind;
1087 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1089 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1093 case TLS_MODEL_GLOBAL_DYNAMIC:
1096 tga_op1 = gen_reg_rtx (Pmode);
1097 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1098 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1099 RTX_UNCHANGING_P (tga_op1) = 1;
1101 tga_op2 = gen_reg_rtx (Pmode);
1102 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1103 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1104 RTX_UNCHANGING_P (tga_op2) = 1;
1106 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1107 LCT_CONST, Pmode, 2, tga_op1,
1108 Pmode, tga_op2, Pmode);
1110 insns = get_insns ();
1113 emit_libcall_block (insns, op0, tga_ret, op1);
1116 case TLS_MODEL_LOCAL_DYNAMIC:
1117 /* ??? This isn't the completely proper way to do local-dynamic
1118 If the call to __tls_get_addr is used only by a single symbol,
1119 then we should (somehow) move the dtprel to the second arg
1120 to avoid the extra add. */
1123 tga_op1 = gen_reg_rtx (Pmode);
1124 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1125 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1126 RTX_UNCHANGING_P (tga_op1) = 1;
1128 tga_op2 = const0_rtx;
1130 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1131 LCT_CONST, Pmode, 2, tga_op1,
1132 Pmode, tga_op2, Pmode);
1134 insns = get_insns ();
1137 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1139 tmp = gen_reg_rtx (Pmode);
1140 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1142 if (register_operand (op0, Pmode))
1145 tga_ret = gen_reg_rtx (Pmode);
1148 emit_insn (gen_load_dtprel (tga_ret, op1));
1149 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1152 emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1158 case TLS_MODEL_INITIAL_EXEC:
1159 tmp = gen_reg_rtx (Pmode);
1160 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1161 tmp = gen_rtx_MEM (Pmode, tmp);
1162 RTX_UNCHANGING_P (tmp) = 1;
1163 tmp = force_reg (Pmode, tmp);
1165 if (register_operand (op0, Pmode))
1168 op1 = gen_reg_rtx (Pmode);
1169 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1174 case TLS_MODEL_LOCAL_EXEC:
1175 if (register_operand (op0, Pmode))
1178 tmp = gen_reg_rtx (Pmode);
1181 emit_insn (gen_load_tprel (tmp, op1));
1182 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1185 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1195 else if (!TARGET_NO_PIC &&
1196 (symbolic_operand (op1, Pmode) ||
1197 symbolic_operand (op1, ptr_mode)))
1199 /* Before optimization starts, delay committing to any particular
1200 type of PIC address load. If this function gets deferred, we
1201 may acquire information that changes the value of the
1202 sdata_symbolic_operand predicate.
1204 But don't delay for function pointers. Loading a function address
1205 actually loads the address of the descriptor not the function.
1206 If we represent these as SYMBOL_REFs, then they get cse'd with
1207 calls, and we end up with calls to the descriptor address instead
1208 of calls to the function address. Functions are not candidates
1211 Don't delay for LABEL_REF because the splitter loses REG_LABEL
1212 notes. Don't delay for pool addresses on general principals;
1213 they'll never become non-local behind our back. */
1215 if (rtx_equal_function_value_matters
1216 && GET_CODE (op1) != LABEL_REF
1217 && ! (GET_CODE (op1) == SYMBOL_REF
1218 && (SYMBOL_REF_FLAG (op1)
1219 || CONSTANT_POOL_ADDRESS_P (op1)
1220 || STRING_POOL_ADDRESS_P (op1))))
1221 if (GET_MODE (op1) == DImode)
1222 emit_insn (gen_movdi_symbolic (op0, op1));
1224 emit_insn (gen_movsi_symbolic (op0, op1));
1226 ia64_expand_load_address (op0, op1, NULL_RTX);
1235 ia64_gp_save_reg (setjmp_p)
1238 rtx save = cfun->machine->ia64_gp_save;
1242 /* We can't save GP in a pseudo if we are calling setjmp, because
1243 pseudos won't be restored by longjmp. For now, we save it in r4. */
1244 /* ??? It would be more efficient to save this directly into a stack
1245 slot. Unfortunately, the stack slot address gets cse'd across
1246 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
1249 /* ??? Get the barf bag, Virginia. We've got to replace this thing
1250 in place, since this rtx is used in exception handling receivers.
1251 Moreover, we must get this rtx out of regno_reg_rtx or reload
1252 will do the wrong thing. */
1253 unsigned int old_regno = REGNO (save);
1254 if (setjmp_p && old_regno != GR_REG (4))
1256 REGNO (save) = GR_REG (4);
1257 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
1263 save = gen_rtx_REG (DImode, GR_REG (4));
1264 else if (! optimize)
1265 save = gen_rtx_REG (DImode, LOC_REG (0));
1267 save = gen_reg_rtx (DImode);
1268 cfun->machine->ia64_gp_save = save;
1274 /* Split a post-reload TImode reference into two DImode components. */
1277 ia64_split_timode (out, in, scratch)
1281 switch (GET_CODE (in))
1284 out[0] = gen_rtx_REG (DImode, REGNO (in));
1285 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1290 rtx base = XEXP (in, 0);
1292 switch (GET_CODE (base))
1295 out[0] = adjust_address (in, DImode, 0);
1298 base = XEXP (base, 0);
1299 out[0] = adjust_address (in, DImode, 0);
1302 /* Since we're changing the mode, we need to change to POST_MODIFY
1303 as well to preserve the size of the increment. Either that or
1304 do the update in two steps, but we've already got this scratch
1305 register handy so let's use it. */
1307 base = XEXP (base, 0);
1309 = change_address (in, DImode,
1311 (Pmode, base, plus_constant (base, 16)));
1314 base = XEXP (base, 0);
1316 = change_address (in, DImode,
1318 (Pmode, base, plus_constant (base, -16)));
1324 if (scratch == NULL_RTX)
1326 out[1] = change_address (in, DImode, scratch);
1327 return gen_adddi3 (scratch, base, GEN_INT (8));
1332 split_double (in, &out[0], &out[1]);
1340 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1341 through memory plus an extra GR scratch register. Except that you can
1342 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1343 SECONDARY_RELOAD_CLASS, but not both.
1345 We got into problems in the first place by allowing a construct like
1346 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1347 This solution attempts to prevent this situation from occurring. When
1348 we see something like the above, we spill the inner register to memory. */
1351 spill_tfmode_operand (in, force)
1355 if (GET_CODE (in) == SUBREG
1356 && GET_MODE (SUBREG_REG (in)) == TImode
1357 && GET_CODE (SUBREG_REG (in)) == REG)
1359 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1360 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1362 else if (force && GET_CODE (in) == REG)
1364 rtx mem = gen_mem_addressof (in, NULL_TREE);
1365 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1367 else if (GET_CODE (in) == MEM
1368 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1369 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1374 /* Emit comparison instruction if necessary, returning the expression
1375 that holds the compare result in the proper mode. */
1378 ia64_expand_compare (code, mode)
1380 enum machine_mode mode;
1382 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1385 /* If we have a BImode input, then we already have a compare result, and
1386 do not need to emit another comparison. */
1387 if (GET_MODE (op0) == BImode)
1389 if ((code == NE || code == EQ) && op1 == const0_rtx)
1396 cmp = gen_reg_rtx (BImode);
1397 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1398 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1402 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1405 /* Emit the appropriate sequence for a call. */
1408 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1414 rtx insn, b0, pfs, gp_save, narg_rtx, dest;
1418 addr = XEXP (addr, 0);
1419 b0 = gen_rtx_REG (DImode, R_BR (0));
1420 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1424 else if (IN_REGNO_P (REGNO (nextarg)))
1425 narg = REGNO (nextarg) - IN_REG (0);
1427 narg = REGNO (nextarg) - OUT_REG (0);
1428 narg_rtx = GEN_INT (narg);
1430 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1433 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
1435 insn = gen_call_nopic (addr, narg_rtx, b0);
1437 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1438 emit_call_insn (insn);
1442 indirect_p = ! symbolic_operand (addr, VOIDmode);
1444 if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
1447 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1450 emit_move_insn (gp_save, pic_offset_table_rtx);
1452 /* If this is an indirect call, then we have the address of a descriptor. */
1455 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1456 emit_move_insn (pic_offset_table_rtx,
1457 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1463 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1465 insn = gen_call_pic (dest, narg_rtx, b0);
1467 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1468 emit_call_insn (insn);
1471 emit_move_insn (pic_offset_table_rtx, gp_save);
1474 /* Begin the assembly file. */
1477 emit_safe_across_calls (f)
1480 unsigned int rs, re;
1487 while (rs < 64 && call_used_regs[PR_REG (rs)])
1491 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1495 fputs ("\t.pred.safe_across_calls ", f);
1501 fprintf (f, "p%u", rs);
1503 fprintf (f, "p%u-p%u", rs, re - 1);
1511 /* Structure to be filled in by ia64_compute_frame_size with register
1512 save masks and offsets for the current function. */
1514 struct ia64_frame_info
1516 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1517 the caller's scratch area. */
1518 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1519 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1520 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1521 HARD_REG_SET mask; /* mask of saved registers. */
1522 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1523 registers or long-term scratches. */
1524 int n_spilled; /* number of spilled registers. */
1525 int reg_fp; /* register for fp. */
1526 int reg_save_b0; /* save register for b0. */
1527 int reg_save_pr; /* save register for prs. */
1528 int reg_save_ar_pfs; /* save register for ar.pfs. */
1529 int reg_save_ar_unat; /* save register for ar.unat. */
1530 int reg_save_ar_lc; /* save register for ar.lc. */
1531 int n_input_regs; /* number of input registers used. */
1532 int n_local_regs; /* number of local registers used. */
1533 int n_output_regs; /* number of output registers used. */
1534 int n_rotate_regs; /* number of rotating registers used. */
1536 char need_regstk; /* true if a .regstk directive needed. */
1537 char initialized; /* true if the data is finalized. */
1540 /* Current frame information calculated by ia64_compute_frame_size. */
1541 static struct ia64_frame_info current_frame_info;
1543 /* Helper function for ia64_compute_frame_size: find an appropriate general
1544 register to spill some special register to. SPECIAL_SPILL_MASK contains
1545 bits in GR0 to GR31 that have already been allocated by this routine.
1546 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1549 find_gr_spill (try_locals)
1554 /* If this is a leaf function, first try an otherwise unused
1555 call-clobbered register. */
1556 if (current_function_is_leaf)
1558 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1559 if (! regs_ever_live[regno]
1560 && call_used_regs[regno]
1561 && ! fixed_regs[regno]
1562 && ! global_regs[regno]
1563 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1565 current_frame_info.gr_used_mask |= 1 << regno;
1572 regno = current_frame_info.n_local_regs;
1573 /* If there is a frame pointer, then we can't use loc79, because
1574 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1575 reg_name switching code in ia64_expand_prologue. */
1576 if (regno < (80 - frame_pointer_needed))
1578 current_frame_info.n_local_regs = regno + 1;
1579 return LOC_REG (0) + regno;
1583 /* Failed to find a general register to spill to. Must use stack. */
1587 /* In order to make for nice schedules, we try to allocate every temporary
1588 to a different register. We must of course stay away from call-saved,
1589 fixed, and global registers. We must also stay away from registers
1590 allocated in current_frame_info.gr_used_mask, since those include regs
1591 used all through the prologue.
1593 Any register allocated here must be used immediately. The idea is to
1594 aid scheduling, not to solve data flow problems. */
1596 static int last_scratch_gr_reg;
1599 next_scratch_gr_reg ()
1603 for (i = 0; i < 32; ++i)
1605 regno = (last_scratch_gr_reg + i + 1) & 31;
1606 if (call_used_regs[regno]
1607 && ! fixed_regs[regno]
1608 && ! global_regs[regno]
1609 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1611 last_scratch_gr_reg = regno;
1616 /* There must be _something_ available. */
1620 /* Helper function for ia64_compute_frame_size, called through
1621 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1624 mark_reg_gr_used_mask (reg, data)
1626 void *data ATTRIBUTE_UNUSED;
1628 unsigned int regno = REGNO (reg);
1631 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1632 for (i = 0; i < n; ++i)
1633 current_frame_info.gr_used_mask |= 1 << (regno + i);
1637 /* Returns the number of bytes offset between the frame pointer and the stack
1638 pointer for the current function. SIZE is the number of bytes of space
1639 needed for local variables. */
1642 ia64_compute_frame_size (size)
1645 HOST_WIDE_INT total_size;
1646 HOST_WIDE_INT spill_size = 0;
1647 HOST_WIDE_INT extra_spill_size = 0;
1648 HOST_WIDE_INT pretend_args_size;
1651 int spilled_gr_p = 0;
1652 int spilled_fr_p = 0;
1656 if (current_frame_info.initialized)
1659 memset (¤t_frame_info, 0, sizeof current_frame_info);
1660 CLEAR_HARD_REG_SET (mask);
1662 /* Don't allocate scratches to the return register. */
1663 diddle_return_value (mark_reg_gr_used_mask, NULL);
1665 /* Don't allocate scratches to the EH scratch registers. */
1666 if (cfun->machine->ia64_eh_epilogue_sp)
1667 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1668 if (cfun->machine->ia64_eh_epilogue_bsp)
1669 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1671 /* Find the size of the register stack frame. We have only 80 local
1672 registers, because we reserve 8 for the inputs and 8 for the
1675 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1676 since we'll be adjusting that down later. */
1677 regno = LOC_REG (78) + ! frame_pointer_needed;
1678 for (; regno >= LOC_REG (0); regno--)
1679 if (regs_ever_live[regno])
1681 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1683 /* For functions marked with the syscall_linkage attribute, we must mark
1684 all eight input registers as in use, so that locals aren't visible to
1687 if (cfun->machine->n_varargs > 0
1688 || lookup_attribute ("syscall_linkage",
1689 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1690 current_frame_info.n_input_regs = 8;
1693 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1694 if (regs_ever_live[regno])
1696 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1699 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1700 if (regs_ever_live[regno])
1702 i = regno - OUT_REG (0) + 1;
1704 /* When -p profiling, we need one output register for the mcount argument.
1705 Likwise for -a profiling for the bb_init_func argument. For -ax
1706 profiling, we need two output registers for the two bb_init_trace_func
1708 if (current_function_profile)
1710 current_frame_info.n_output_regs = i;
1712 /* ??? No rotating register support yet. */
1713 current_frame_info.n_rotate_regs = 0;
1715 /* Discover which registers need spilling, and how much room that
1716 will take. Begin with floating point and general registers,
1717 which will always wind up on the stack. */
1719 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1720 if (regs_ever_live[regno] && ! call_used_regs[regno])
1722 SET_HARD_REG_BIT (mask, regno);
1728 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1729 if (regs_ever_live[regno] && ! call_used_regs[regno])
1731 SET_HARD_REG_BIT (mask, regno);
1737 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1738 if (regs_ever_live[regno] && ! call_used_regs[regno])
1740 SET_HARD_REG_BIT (mask, regno);
1745 /* Now come all special registers that might get saved in other
1746 general registers. */
1748 if (frame_pointer_needed)
1750 current_frame_info.reg_fp = find_gr_spill (1);
1751 /* If we did not get a register, then we take LOC79. This is guaranteed
1752 to be free, even if regs_ever_live is already set, because this is
1753 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1754 as we don't count loc79 above. */
1755 if (current_frame_info.reg_fp == 0)
1757 current_frame_info.reg_fp = LOC_REG (79);
1758 current_frame_info.n_local_regs++;
1762 if (! current_function_is_leaf)
1764 /* Emit a save of BR0 if we call other functions. Do this even
1765 if this function doesn't return, as EH depends on this to be
1766 able to unwind the stack. */
1767 SET_HARD_REG_BIT (mask, BR_REG (0));
1769 current_frame_info.reg_save_b0 = find_gr_spill (1);
1770 if (current_frame_info.reg_save_b0 == 0)
1776 /* Similarly for ar.pfs. */
1777 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1778 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1779 if (current_frame_info.reg_save_ar_pfs == 0)
1781 extra_spill_size += 8;
1787 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1789 SET_HARD_REG_BIT (mask, BR_REG (0));
1795 /* Unwind descriptor hackery: things are most efficient if we allocate
1796 consecutive GR save registers for RP, PFS, FP in that order. However,
1797 it is absolutely critical that FP get the only hard register that's
1798 guaranteed to be free, so we allocated it first. If all three did
1799 happen to be allocated hard regs, and are consecutive, rearrange them
1800 into the preferred order now. */
1801 if (current_frame_info.reg_fp != 0
1802 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1803 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1805 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1806 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1807 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1810 /* See if we need to store the predicate register block. */
1811 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1812 if (regs_ever_live[regno] && ! call_used_regs[regno])
1814 if (regno <= PR_REG (63))
1816 SET_HARD_REG_BIT (mask, PR_REG (0));
1817 current_frame_info.reg_save_pr = find_gr_spill (1);
1818 if (current_frame_info.reg_save_pr == 0)
1820 extra_spill_size += 8;
1824 /* ??? Mark them all as used so that register renaming and such
1825 are free to use them. */
1826 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1827 regs_ever_live[regno] = 1;
1830 /* If we're forced to use st8.spill, we're forced to save and restore
1832 if (spilled_gr_p || cfun->machine->n_varargs)
1834 regs_ever_live[AR_UNAT_REGNUM] = 1;
1835 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1836 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1837 if (current_frame_info.reg_save_ar_unat == 0)
1839 extra_spill_size += 8;
1844 if (regs_ever_live[AR_LC_REGNUM])
1846 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1847 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1848 if (current_frame_info.reg_save_ar_lc == 0)
1850 extra_spill_size += 8;
1855 /* If we have an odd number of words of pretend arguments written to
1856 the stack, then the FR save area will be unaligned. We round the
1857 size of this area up to keep things 16 byte aligned. */
1859 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1861 pretend_args_size = current_function_pretend_args_size;
1863 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1864 + current_function_outgoing_args_size);
1865 total_size = IA64_STACK_ALIGN (total_size);
1867 /* We always use the 16-byte scratch area provided by the caller, but
1868 if we are a leaf function, there's no one to which we need to provide
1870 if (current_function_is_leaf)
1871 total_size = MAX (0, total_size - 16);
1873 current_frame_info.total_size = total_size;
1874 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1875 current_frame_info.spill_size = spill_size;
1876 current_frame_info.extra_spill_size = extra_spill_size;
1877 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1878 current_frame_info.n_spilled = n_spilled;
1879 current_frame_info.initialized = reload_completed;
1882 /* Compute the initial difference between the specified pair of registers. */
1885 ia64_initial_elimination_offset (from, to)
1888 HOST_WIDE_INT offset;
1890 ia64_compute_frame_size (get_frame_size ());
1893 case FRAME_POINTER_REGNUM:
1894 if (to == HARD_FRAME_POINTER_REGNUM)
1896 if (current_function_is_leaf)
1897 offset = -current_frame_info.total_size;
1899 offset = -(current_frame_info.total_size
1900 - current_function_outgoing_args_size - 16);
1902 else if (to == STACK_POINTER_REGNUM)
1904 if (current_function_is_leaf)
1907 offset = 16 + current_function_outgoing_args_size;
1913 case ARG_POINTER_REGNUM:
1914 /* Arguments start above the 16 byte save area, unless stdarg
1915 in which case we store through the 16 byte save area. */
1916 if (to == HARD_FRAME_POINTER_REGNUM)
1917 offset = 16 - current_function_pretend_args_size;
1918 else if (to == STACK_POINTER_REGNUM)
1919 offset = (current_frame_info.total_size
1920 + 16 - current_function_pretend_args_size);
1925 case RETURN_ADDRESS_POINTER_REGNUM:
1936 /* If there are more than a trivial number of register spills, we use
1937 two interleaved iterators so that we can get two memory references
1940 In order to simplify things in the prologue and epilogue expanders,
1941 we use helper functions to fix up the memory references after the
1942 fact with the appropriate offsets to a POST_MODIFY memory mode.
1943 The following data structure tracks the state of the two iterators
1944 while insns are being emitted. */
1946 struct spill_fill_data
1948 rtx init_after; /* point at which to emit initializations */
1949 rtx init_reg[2]; /* initial base register */
1950 rtx iter_reg[2]; /* the iterator registers */
1951 rtx *prev_addr[2]; /* address of last memory use */
1952 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
1953 HOST_WIDE_INT prev_off[2]; /* last offset */
1954 int n_iter; /* number of iterators in use */
1955 int next_iter; /* next iterator to use */
1956 unsigned int save_gr_used_mask;
1959 static struct spill_fill_data spill_fill_data;
1962 setup_spill_pointers (n_spills, init_reg, cfa_off)
1965 HOST_WIDE_INT cfa_off;
1969 spill_fill_data.init_after = get_last_insn ();
1970 spill_fill_data.init_reg[0] = init_reg;
1971 spill_fill_data.init_reg[1] = init_reg;
1972 spill_fill_data.prev_addr[0] = NULL;
1973 spill_fill_data.prev_addr[1] = NULL;
1974 spill_fill_data.prev_insn[0] = NULL;
1975 spill_fill_data.prev_insn[1] = NULL;
1976 spill_fill_data.prev_off[0] = cfa_off;
1977 spill_fill_data.prev_off[1] = cfa_off;
1978 spill_fill_data.next_iter = 0;
1979 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1981 spill_fill_data.n_iter = 1 + (n_spills > 2);
1982 for (i = 0; i < spill_fill_data.n_iter; ++i)
1984 int regno = next_scratch_gr_reg ();
1985 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1986 current_frame_info.gr_used_mask |= 1 << regno;
1991 finish_spill_pointers ()
1993 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1997 spill_restore_mem (reg, cfa_off)
1999 HOST_WIDE_INT cfa_off;
2001 int iter = spill_fill_data.next_iter;
2002 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2003 rtx disp_rtx = GEN_INT (disp);
2006 if (spill_fill_data.prev_addr[iter])
2008 if (CONST_OK_FOR_N (disp))
2010 *spill_fill_data.prev_addr[iter]
2011 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2012 gen_rtx_PLUS (DImode,
2013 spill_fill_data.iter_reg[iter],
2015 REG_NOTES (spill_fill_data.prev_insn[iter])
2016 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2017 REG_NOTES (spill_fill_data.prev_insn[iter]));
2021 /* ??? Could use register post_modify for loads. */
2022 if (! CONST_OK_FOR_I (disp))
2024 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2025 emit_move_insn (tmp, disp_rtx);
2028 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2029 spill_fill_data.iter_reg[iter], disp_rtx));
2032 /* Micro-optimization: if we've created a frame pointer, it's at
2033 CFA 0, which may allow the real iterator to be initialized lower,
2034 slightly increasing parallelism. Also, if there are few saves
2035 it may eliminate the iterator entirely. */
2037 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2038 && frame_pointer_needed)
2040 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2041 set_mem_alias_set (mem, get_varargs_alias_set ());
2049 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2050 spill_fill_data.init_reg[iter]);
2055 if (! CONST_OK_FOR_I (disp))
2057 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2058 emit_move_insn (tmp, disp_rtx);
2062 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2063 spill_fill_data.init_reg[iter],
2070 /* Careful for being the first insn in a sequence. */
2071 if (spill_fill_data.init_after)
2072 insn = emit_insn_after (seq, spill_fill_data.init_after);
2075 rtx first = get_insns ();
2077 insn = emit_insn_before (seq, first);
2079 insn = emit_insn (seq);
2081 spill_fill_data.init_after = insn;
2083 /* If DISP is 0, we may or may not have a further adjustment
2084 afterward. If we do, then the load/store insn may be modified
2085 to be a post-modify. If we don't, then this copy may be
2086 eliminated by copyprop_hardreg_forward, which makes this
2087 insn garbage, which runs afoul of the sanity check in
2088 propagate_one_insn. So mark this insn as legal to delete. */
2090 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2094 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2096 /* ??? Not all of the spills are for varargs, but some of them are.
2097 The rest of the spills belong in an alias set of their own. But
2098 it doesn't actually hurt to include them here. */
2099 set_mem_alias_set (mem, get_varargs_alias_set ());
2101 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2102 spill_fill_data.prev_off[iter] = cfa_off;
2104 if (++iter >= spill_fill_data.n_iter)
2106 spill_fill_data.next_iter = iter;
2112 do_spill (move_fn, reg, cfa_off, frame_reg)
2113 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2115 HOST_WIDE_INT cfa_off;
2117 int iter = spill_fill_data.next_iter;
2120 mem = spill_restore_mem (reg, cfa_off);
2121 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2122 spill_fill_data.prev_insn[iter] = insn;
2129 RTX_FRAME_RELATED_P (insn) = 1;
2131 /* Don't even pretend that the unwind code can intuit its way
2132 through a pair of interleaved post_modify iterators. Just
2133 provide the correct answer. */
2135 if (frame_pointer_needed)
2137 base = hard_frame_pointer_rtx;
2142 base = stack_pointer_rtx;
2143 off = current_frame_info.total_size - cfa_off;
2147 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2148 gen_rtx_SET (VOIDmode,
2149 gen_rtx_MEM (GET_MODE (reg),
2150 plus_constant (base, off)),
2157 do_restore (move_fn, reg, cfa_off)
2158 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2160 HOST_WIDE_INT cfa_off;
2162 int iter = spill_fill_data.next_iter;
2165 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2166 GEN_INT (cfa_off)));
2167 spill_fill_data.prev_insn[iter] = insn;
2170 /* Wrapper functions that discards the CONST_INT spill offset. These
2171 exist so that we can give gr_spill/gr_fill the offset they need and
2172 use a consistant function interface. */
2175 gen_movdi_x (dest, src, offset)
2177 rtx offset ATTRIBUTE_UNUSED;
2179 return gen_movdi (dest, src);
2183 gen_fr_spill_x (dest, src, offset)
2185 rtx offset ATTRIBUTE_UNUSED;
2187 return gen_fr_spill (dest, src);
2191 gen_fr_restore_x (dest, src, offset)
2193 rtx offset ATTRIBUTE_UNUSED;
2195 return gen_fr_restore (dest, src);
2198 /* Called after register allocation to add any instructions needed for the
2199 prologue. Using a prologue insn is favored compared to putting all of the
2200 instructions in output_function_prologue(), since it allows the scheduler
2201 to intermix instructions with the saves of the caller saved registers. In
2202 some cases, it might be necessary to emit a barrier instruction as the last
2203 insn to prevent such scheduling.
2205 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2206 so that the debug info generation code can handle them properly.
2208 The register save area is layed out like so:
2210 [ varargs spill area ]
2211 [ fr register spill area ]
2212 [ br register spill area ]
2213 [ ar register spill area ]
2214 [ pr register spill area ]
2215 [ gr register spill area ] */
2217 /* ??? Get inefficient code when the frame size is larger than can fit in an
2218 adds instruction. */
2221 ia64_expand_prologue ()
2223 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2224 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2227 ia64_compute_frame_size (get_frame_size ());
2228 last_scratch_gr_reg = 15;
2230 /* If there is no epilogue, then we don't need some prologue insns.
2231 We need to avoid emitting the dead prologue insns, because flow
2232 will complain about them. */
2237 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2238 if ((e->flags & EDGE_FAKE) == 0
2239 && (e->flags & EDGE_FALLTHRU) != 0)
2241 epilogue_p = (e != NULL);
2246 /* Set the local, input, and output register names. We need to do this
2247 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2248 half. If we use in/loc/out register names, then we get assembler errors
2249 in crtn.S because there is no alloc insn or regstk directive in there. */
2250 if (! TARGET_REG_NAMES)
2252 int inputs = current_frame_info.n_input_regs;
2253 int locals = current_frame_info.n_local_regs;
2254 int outputs = current_frame_info.n_output_regs;
2256 for (i = 0; i < inputs; i++)
2257 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2258 for (i = 0; i < locals; i++)
2259 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2260 for (i = 0; i < outputs; i++)
2261 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2264 /* Set the frame pointer register name. The regnum is logically loc79,
2265 but of course we'll not have allocated that many locals. Rather than
2266 worrying about renumbering the existing rtxs, we adjust the name. */
2267 /* ??? This code means that we can never use one local register when
2268 there is a frame pointer. loc79 gets wasted in this case, as it is
2269 renamed to a register that will never be used. See also the try_locals
2270 code in find_gr_spill. */
2271 if (current_frame_info.reg_fp)
2273 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2274 reg_names[HARD_FRAME_POINTER_REGNUM]
2275 = reg_names[current_frame_info.reg_fp];
2276 reg_names[current_frame_info.reg_fp] = tmp;
2279 /* Fix up the return address placeholder. */
2280 /* ??? We can fail if __builtin_return_address is used, and we didn't
2281 allocate a register in which to save b0. I can't think of a way to
2282 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2283 then be sure that I got the right one. Further, reload doesn't seem
2284 to care if an eliminable register isn't used, and "eliminates" it
2286 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2287 && current_frame_info.reg_save_b0 != 0)
2288 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2290 /* We don't need an alloc instruction if we've used no outputs or locals. */
2291 if (current_frame_info.n_local_regs == 0
2292 && current_frame_info.n_output_regs == 0
2293 && current_frame_info.n_input_regs <= current_function_args_info.int_regs)
2295 /* If there is no alloc, but there are input registers used, then we
2296 need a .regstk directive. */
2297 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2298 ar_pfs_save_reg = NULL_RTX;
2302 current_frame_info.need_regstk = 0;
2304 if (current_frame_info.reg_save_ar_pfs)
2305 regno = current_frame_info.reg_save_ar_pfs;
2307 regno = next_scratch_gr_reg ();
2308 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2310 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2311 GEN_INT (current_frame_info.n_input_regs),
2312 GEN_INT (current_frame_info.n_local_regs),
2313 GEN_INT (current_frame_info.n_output_regs),
2314 GEN_INT (current_frame_info.n_rotate_regs)));
2315 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2318 /* Set up frame pointer, stack pointer, and spill iterators. */
2320 n_varargs = cfun->machine->n_varargs;
2321 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2322 stack_pointer_rtx, 0);
2324 if (frame_pointer_needed)
2326 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2327 RTX_FRAME_RELATED_P (insn) = 1;
2330 if (current_frame_info.total_size != 0)
2332 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2335 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2336 offset = frame_size_rtx;
2339 regno = next_scratch_gr_reg ();
2340 offset = gen_rtx_REG (DImode, regno);
2341 emit_move_insn (offset, frame_size_rtx);
2344 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2345 stack_pointer_rtx, offset));
2347 if (! frame_pointer_needed)
2349 RTX_FRAME_RELATED_P (insn) = 1;
2350 if (GET_CODE (offset) != CONST_INT)
2353 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2354 gen_rtx_SET (VOIDmode,
2356 gen_rtx_PLUS (DImode,
2363 /* ??? At this point we must generate a magic insn that appears to
2364 modify the stack pointer, the frame pointer, and all spill
2365 iterators. This would allow the most scheduling freedom. For
2366 now, just hard stop. */
2367 emit_insn (gen_blockage ());
2370 /* Must copy out ar.unat before doing any integer spills. */
2371 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2373 if (current_frame_info.reg_save_ar_unat)
2375 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2378 alt_regno = next_scratch_gr_reg ();
2379 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2380 current_frame_info.gr_used_mask |= 1 << alt_regno;
2383 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2384 insn = emit_move_insn (ar_unat_save_reg, reg);
2385 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2387 /* Even if we're not going to generate an epilogue, we still
2388 need to save the register so that EH works. */
2389 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2390 emit_insn (gen_prologue_use (ar_unat_save_reg));
2393 ar_unat_save_reg = NULL_RTX;
2395 /* Spill all varargs registers. Do this before spilling any GR registers,
2396 since we want the UNAT bits for the GR registers to override the UNAT
2397 bits from varargs, which we don't care about. */
2400 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2402 reg = gen_rtx_REG (DImode, regno);
2403 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2406 /* Locate the bottom of the register save area. */
2407 cfa_off = (current_frame_info.spill_cfa_off
2408 + current_frame_info.spill_size
2409 + current_frame_info.extra_spill_size);
2411 /* Save the predicate register block either in a register or in memory. */
2412 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2414 reg = gen_rtx_REG (DImode, PR_REG (0));
2415 if (current_frame_info.reg_save_pr != 0)
2417 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2418 insn = emit_move_insn (alt_reg, reg);
2420 /* ??? Denote pr spill/fill by a DImode move that modifies all
2421 64 hard registers. */
2422 RTX_FRAME_RELATED_P (insn) = 1;
2424 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2425 gen_rtx_SET (VOIDmode, alt_reg, reg),
2428 /* Even if we're not going to generate an epilogue, we still
2429 need to save the register so that EH works. */
2431 emit_insn (gen_prologue_use (alt_reg));
2435 alt_regno = next_scratch_gr_reg ();
2436 alt_reg = gen_rtx_REG (DImode, alt_regno);
2437 insn = emit_move_insn (alt_reg, reg);
2438 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2443 /* Handle AR regs in numerical order. All of them get special handling. */
2444 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2445 && current_frame_info.reg_save_ar_unat == 0)
2447 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2448 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2452 /* The alloc insn already copied ar.pfs into a general register. The
2453 only thing we have to do now is copy that register to a stack slot
2454 if we'd not allocated a local register for the job. */
2455 if (current_frame_info.reg_save_ar_pfs == 0
2456 && ! current_function_is_leaf)
2458 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2459 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2463 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2465 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2466 if (current_frame_info.reg_save_ar_lc != 0)
2468 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2469 insn = emit_move_insn (alt_reg, reg);
2470 RTX_FRAME_RELATED_P (insn) = 1;
2472 /* Even if we're not going to generate an epilogue, we still
2473 need to save the register so that EH works. */
2475 emit_insn (gen_prologue_use (alt_reg));
2479 alt_regno = next_scratch_gr_reg ();
2480 alt_reg = gen_rtx_REG (DImode, alt_regno);
2481 emit_move_insn (alt_reg, reg);
2482 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2487 /* We should now be at the base of the gr/br/fr spill area. */
2488 if (cfa_off != (current_frame_info.spill_cfa_off
2489 + current_frame_info.spill_size))
2492 /* Spill all general registers. */
2493 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2494 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2496 reg = gen_rtx_REG (DImode, regno);
2497 do_spill (gen_gr_spill, reg, cfa_off, reg);
2501 /* Handle BR0 specially -- it may be getting stored permanently in
2502 some GR register. */
2503 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2505 reg = gen_rtx_REG (DImode, BR_REG (0));
2506 if (current_frame_info.reg_save_b0 != 0)
2508 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2509 insn = emit_move_insn (alt_reg, reg);
2510 RTX_FRAME_RELATED_P (insn) = 1;
2512 /* Even if we're not going to generate an epilogue, we still
2513 need to save the register so that EH works. */
2515 emit_insn (gen_prologue_use (alt_reg));
2519 alt_regno = next_scratch_gr_reg ();
2520 alt_reg = gen_rtx_REG (DImode, alt_regno);
2521 emit_move_insn (alt_reg, reg);
2522 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2527 /* Spill the rest of the BR registers. */
2528 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2529 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2531 alt_regno = next_scratch_gr_reg ();
2532 alt_reg = gen_rtx_REG (DImode, alt_regno);
2533 reg = gen_rtx_REG (DImode, regno);
2534 emit_move_insn (alt_reg, reg);
2535 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2539 /* Align the frame and spill all FR registers. */
2540 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2541 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2545 reg = gen_rtx_REG (TFmode, regno);
2546 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2550 if (cfa_off != current_frame_info.spill_cfa_off)
2553 finish_spill_pointers ();
2556 /* Called after register allocation to add any instructions needed for the
2557 epilogue. Using an epilogue insn is favored compared to putting all of the
2558 instructions in output_function_prologue(), since it allows the scheduler
2559 to intermix instructions with the saves of the caller saved registers. In
2560 some cases, it might be necessary to emit a barrier instruction as the last
2561 insn to prevent such scheduling. */
2564 ia64_expand_epilogue (sibcall_p)
2567 rtx insn, reg, alt_reg, ar_unat_save_reg;
2568 int regno, alt_regno, cfa_off;
2570 ia64_compute_frame_size (get_frame_size ());
2572 /* If there is a frame pointer, then we use it instead of the stack
2573 pointer, so that the stack pointer does not need to be valid when
2574 the epilogue starts. See EXIT_IGNORE_STACK. */
2575 if (frame_pointer_needed)
2576 setup_spill_pointers (current_frame_info.n_spilled,
2577 hard_frame_pointer_rtx, 0);
2579 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2580 current_frame_info.total_size);
2582 if (current_frame_info.total_size != 0)
2584 /* ??? At this point we must generate a magic insn that appears to
2585 modify the spill iterators and the frame pointer. This would
2586 allow the most scheduling freedom. For now, just hard stop. */
2587 emit_insn (gen_blockage ());
2590 /* Locate the bottom of the register save area. */
2591 cfa_off = (current_frame_info.spill_cfa_off
2592 + current_frame_info.spill_size
2593 + current_frame_info.extra_spill_size);
2595 /* Restore the predicate registers. */
2596 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2598 if (current_frame_info.reg_save_pr != 0)
2599 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2602 alt_regno = next_scratch_gr_reg ();
2603 alt_reg = gen_rtx_REG (DImode, alt_regno);
2604 do_restore (gen_movdi_x, alt_reg, cfa_off);
2607 reg = gen_rtx_REG (DImode, PR_REG (0));
2608 emit_move_insn (reg, alt_reg);
2611 /* Restore the application registers. */
2613 /* Load the saved unat from the stack, but do not restore it until
2614 after the GRs have been restored. */
2615 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2617 if (current_frame_info.reg_save_ar_unat != 0)
2619 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2622 alt_regno = next_scratch_gr_reg ();
2623 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2624 current_frame_info.gr_used_mask |= 1 << alt_regno;
2625 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2630 ar_unat_save_reg = NULL_RTX;
2632 if (current_frame_info.reg_save_ar_pfs != 0)
2634 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2635 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2636 emit_move_insn (reg, alt_reg);
2638 else if (! current_function_is_leaf)
2640 alt_regno = next_scratch_gr_reg ();
2641 alt_reg = gen_rtx_REG (DImode, alt_regno);
2642 do_restore (gen_movdi_x, alt_reg, cfa_off);
2644 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2645 emit_move_insn (reg, alt_reg);
2648 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2650 if (current_frame_info.reg_save_ar_lc != 0)
2651 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2654 alt_regno = next_scratch_gr_reg ();
2655 alt_reg = gen_rtx_REG (DImode, alt_regno);
2656 do_restore (gen_movdi_x, alt_reg, cfa_off);
2659 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2660 emit_move_insn (reg, alt_reg);
2663 /* We should now be at the base of the gr/br/fr spill area. */
2664 if (cfa_off != (current_frame_info.spill_cfa_off
2665 + current_frame_info.spill_size))
2668 /* Restore all general registers. */
2669 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2670 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2672 reg = gen_rtx_REG (DImode, regno);
2673 do_restore (gen_gr_restore, reg, cfa_off);
2677 /* Restore the branch registers. Handle B0 specially, as it may
2678 have gotten stored in some GR register. */
2679 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2681 if (current_frame_info.reg_save_b0 != 0)
2682 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2685 alt_regno = next_scratch_gr_reg ();
2686 alt_reg = gen_rtx_REG (DImode, alt_regno);
2687 do_restore (gen_movdi_x, alt_reg, cfa_off);
2690 reg = gen_rtx_REG (DImode, BR_REG (0));
2691 emit_move_insn (reg, alt_reg);
2694 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2695 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2697 alt_regno = next_scratch_gr_reg ();
2698 alt_reg = gen_rtx_REG (DImode, alt_regno);
2699 do_restore (gen_movdi_x, alt_reg, cfa_off);
2701 reg = gen_rtx_REG (DImode, regno);
2702 emit_move_insn (reg, alt_reg);
2705 /* Restore floating point registers. */
2706 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2707 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2711 reg = gen_rtx_REG (TFmode, regno);
2712 do_restore (gen_fr_restore_x, reg, cfa_off);
2716 /* Restore ar.unat for real. */
2717 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2719 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2720 emit_move_insn (reg, ar_unat_save_reg);
2723 if (cfa_off != current_frame_info.spill_cfa_off)
2726 finish_spill_pointers ();
2728 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2730 /* ??? At this point we must generate a magic insn that appears to
2731 modify the spill iterators, the stack pointer, and the frame
2732 pointer. This would allow the most scheduling freedom. For now,
2734 emit_insn (gen_blockage ());
2737 if (cfun->machine->ia64_eh_epilogue_sp)
2738 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2739 else if (frame_pointer_needed)
2741 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2742 RTX_FRAME_RELATED_P (insn) = 1;
2744 else if (current_frame_info.total_size)
2746 rtx offset, frame_size_rtx;
2748 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2749 if (CONST_OK_FOR_I (current_frame_info.total_size))
2750 offset = frame_size_rtx;
2753 regno = next_scratch_gr_reg ();
2754 offset = gen_rtx_REG (DImode, regno);
2755 emit_move_insn (offset, frame_size_rtx);
2758 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2761 RTX_FRAME_RELATED_P (insn) = 1;
2762 if (GET_CODE (offset) != CONST_INT)
2765 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2766 gen_rtx_SET (VOIDmode,
2768 gen_rtx_PLUS (DImode,
2775 if (cfun->machine->ia64_eh_epilogue_bsp)
2776 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2779 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2782 int fp = GR_REG (2);
2783 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2784 first available call clobbered register. If there was a frame_pointer
2785 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2786 so we have to make sure we're using the string "r2" when emitting
2787 the register name for the assmbler. */
2788 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2789 fp = HARD_FRAME_POINTER_REGNUM;
2791 /* We must emit an alloc to force the input registers to become output
2792 registers. Otherwise, if the callee tries to pass its parameters
2793 through to another call without an intervening alloc, then these
2795 /* ??? We don't need to preserve all input registers. We only need to
2796 preserve those input registers used as arguments to the sibling call.
2797 It is unclear how to compute that number here. */
2798 if (current_frame_info.n_input_regs != 0)
2799 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2800 GEN_INT (0), GEN_INT (0),
2801 GEN_INT (current_frame_info.n_input_regs),
2806 /* Return 1 if br.ret can do all the work required to return from a
2810 ia64_direct_return ()
2812 if (reload_completed && ! frame_pointer_needed)
2814 ia64_compute_frame_size (get_frame_size ());
2816 return (current_frame_info.total_size == 0
2817 && current_frame_info.n_spilled == 0
2818 && current_frame_info.reg_save_b0 == 0
2819 && current_frame_info.reg_save_pr == 0
2820 && current_frame_info.reg_save_ar_pfs == 0
2821 && current_frame_info.reg_save_ar_unat == 0
2822 && current_frame_info.reg_save_ar_lc == 0);
2828 ia64_hard_regno_rename_ok (from, to)
2832 /* Don't clobber any of the registers we reserved for the prologue. */
2833 if (to == current_frame_info.reg_fp
2834 || to == current_frame_info.reg_save_b0
2835 || to == current_frame_info.reg_save_pr
2836 || to == current_frame_info.reg_save_ar_pfs
2837 || to == current_frame_info.reg_save_ar_unat
2838 || to == current_frame_info.reg_save_ar_lc)
2841 if (from == current_frame_info.reg_fp
2842 || from == current_frame_info.reg_save_b0
2843 || from == current_frame_info.reg_save_pr
2844 || from == current_frame_info.reg_save_ar_pfs
2845 || from == current_frame_info.reg_save_ar_unat
2846 || from == current_frame_info.reg_save_ar_lc)
2849 /* Don't use output registers outside the register frame. */
2850 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2853 /* Retain even/oddness on predicate register pairs. */
2854 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2855 return (from & 1) == (to & 1);
2857 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2858 if (from == GR_REG (4) && current_function_calls_setjmp)
2864 /* Target hook for assembling integer objects. Handle word-sized
2865 aligned objects and detect the cases when @fptr is needed. */
2868 ia64_assemble_integer (x, size, aligned_p)
2873 if (size == (TARGET_ILP32 ? 4 : 8)
2875 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2876 && GET_CODE (x) == SYMBOL_REF
2877 && SYMBOL_REF_FLAG (x))
2880 fputs ("\tdata4\t@fptr(", asm_out_file);
2882 fputs ("\tdata8\t@fptr(", asm_out_file);
2883 output_addr_const (asm_out_file, x);
2884 fputs (")\n", asm_out_file);
2887 return default_assemble_integer (x, size, aligned_p);
2890 /* Emit the function prologue. */
2893 ia64_output_function_prologue (file, size)
2895 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2897 int mask, grsave, grsave_prev;
2899 if (current_frame_info.need_regstk)
2900 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2901 current_frame_info.n_input_regs,
2902 current_frame_info.n_local_regs,
2903 current_frame_info.n_output_regs,
2904 current_frame_info.n_rotate_regs);
2906 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2909 /* Emit the .prologue directive. */
2912 grsave = grsave_prev = 0;
2913 if (current_frame_info.reg_save_b0 != 0)
2916 grsave = grsave_prev = current_frame_info.reg_save_b0;
2918 if (current_frame_info.reg_save_ar_pfs != 0
2919 && (grsave_prev == 0
2920 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2923 if (grsave_prev == 0)
2924 grsave = current_frame_info.reg_save_ar_pfs;
2925 grsave_prev = current_frame_info.reg_save_ar_pfs;
2927 if (current_frame_info.reg_fp != 0
2928 && (grsave_prev == 0
2929 || current_frame_info.reg_fp == grsave_prev + 1))
2932 if (grsave_prev == 0)
2933 grsave = HARD_FRAME_POINTER_REGNUM;
2934 grsave_prev = current_frame_info.reg_fp;
2936 if (current_frame_info.reg_save_pr != 0
2937 && (grsave_prev == 0
2938 || current_frame_info.reg_save_pr == grsave_prev + 1))
2941 if (grsave_prev == 0)
2942 grsave = current_frame_info.reg_save_pr;
2946 fprintf (file, "\t.prologue %d, %d\n", mask,
2947 ia64_dbx_register_number (grsave));
2949 fputs ("\t.prologue\n", file);
2951 /* Emit a .spill directive, if necessary, to relocate the base of
2952 the register spill area. */
2953 if (current_frame_info.spill_cfa_off != -16)
2954 fprintf (file, "\t.spill %ld\n",
2955 (long) (current_frame_info.spill_cfa_off
2956 + current_frame_info.spill_size));
2959 /* Emit the .body directive at the scheduled end of the prologue. */
2962 ia64_output_function_end_prologue (file)
2965 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
2968 fputs ("\t.body\n", file);
2971 /* Emit the function epilogue. */
2974 ia64_output_function_epilogue (file, size)
2975 FILE *file ATTRIBUTE_UNUSED;
2976 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
2980 /* Reset from the function's potential modifications. */
2981 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2983 if (current_frame_info.reg_fp)
2985 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2986 reg_names[HARD_FRAME_POINTER_REGNUM]
2987 = reg_names[current_frame_info.reg_fp];
2988 reg_names[current_frame_info.reg_fp] = tmp;
2990 if (! TARGET_REG_NAMES)
2992 for (i = 0; i < current_frame_info.n_input_regs; i++)
2993 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2994 for (i = 0; i < current_frame_info.n_local_regs; i++)
2995 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2996 for (i = 0; i < current_frame_info.n_output_regs; i++)
2997 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3000 current_frame_info.initialized = 0;
3004 ia64_dbx_register_number (regno)
3007 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3008 from its home at loc79 to something inside the register frame. We
3009 must perform the same renumbering here for the debug info. */
3010 if (current_frame_info.reg_fp)
3012 if (regno == HARD_FRAME_POINTER_REGNUM)
3013 regno = current_frame_info.reg_fp;
3014 else if (regno == current_frame_info.reg_fp)
3015 regno = HARD_FRAME_POINTER_REGNUM;
3018 if (IN_REGNO_P (regno))
3019 return 32 + regno - IN_REG (0);
3020 else if (LOC_REGNO_P (regno))
3021 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3022 else if (OUT_REGNO_P (regno))
3023 return (32 + current_frame_info.n_input_regs
3024 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3030 ia64_initialize_trampoline (addr, fnaddr, static_chain)
3031 rtx addr, fnaddr, static_chain;
3033 rtx addr_reg, eight = GEN_INT (8);
3035 /* Load up our iterator. */
3036 addr_reg = gen_reg_rtx (Pmode);
3037 emit_move_insn (addr_reg, addr);
3039 /* The first two words are the fake descriptor:
3040 __ia64_trampoline, ADDR+16. */
3041 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3042 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3043 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3045 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3046 copy_to_reg (plus_constant (addr, 16)));
3047 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3049 /* The third word is the target descriptor. */
3050 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3051 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3053 /* The fourth word is the static chain. */
3054 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3057 /* Do any needed setup for a variadic function. CUM has not been updated
3058 for the last named argument which has type TYPE and mode MODE.
3060 We generate the actual spill instructions during prologue generation. */
3063 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3064 CUMULATIVE_ARGS cum;
3068 int second_time ATTRIBUTE_UNUSED;
3070 /* Skip the current argument. */
3071 ia64_function_arg_advance (&cum, int_mode, type, 1);
3073 if (cum.words < MAX_ARGUMENT_SLOTS)
3075 int n = MAX_ARGUMENT_SLOTS - cum.words;
3076 *pretend_size = n * UNITS_PER_WORD;
3077 cfun->machine->n_varargs = n;
3081 /* Check whether TYPE is a homogeneous floating point aggregate. If
3082 it is, return the mode of the floating point type that appears
3083 in all leafs. If it is not, return VOIDmode.
3085 An aggregate is a homogeneous floating point aggregate is if all
3086 fields/elements in it have the same floating point type (e.g,
3087 SFmode). 128-bit quad-precision floats are excluded. */
3089 static enum machine_mode
3090 hfa_element_mode (type, nested)
3094 enum machine_mode element_mode = VOIDmode;
3095 enum machine_mode mode;
3096 enum tree_code code = TREE_CODE (type);
3097 int know_element_mode = 0;
3102 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3103 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3104 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3105 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3109 /* Fortran complex types are supposed to be HFAs, so we need to handle
3110 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3113 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
3114 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3115 * BITS_PER_UNIT, MODE_FLOAT, 0);
3120 /* ??? Should exclude 128-bit long double here. */
3121 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3122 mode if this is contained within an aggregate. */
3124 return TYPE_MODE (type);
3129 return hfa_element_mode (TREE_TYPE (type), 1);
3133 case QUAL_UNION_TYPE:
3134 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3136 if (TREE_CODE (t) != FIELD_DECL)
3139 mode = hfa_element_mode (TREE_TYPE (t), 1);
3140 if (know_element_mode)
3142 if (mode != element_mode)
3145 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3149 know_element_mode = 1;
3150 element_mode = mode;
3153 return element_mode;
3156 /* If we reach here, we probably have some front-end specific type
3157 that the backend doesn't know about. This can happen via the
3158 aggregate_value_p call in init_function_start. All we can do is
3159 ignore unknown tree types. */
3166 /* Return rtx for register where argument is passed, or zero if it is passed
3169 /* ??? 128-bit quad-precision floats are always passed in general
3173 ia64_function_arg (cum, mode, type, named, incoming)
3174 CUMULATIVE_ARGS *cum;
3175 enum machine_mode mode;
3180 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3181 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3182 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3185 enum machine_mode hfa_mode = VOIDmode;
3187 /* Integer and float arguments larger than 8 bytes start at the next even
3188 boundary. Aggregates larger than 8 bytes start at the next even boundary
3189 if the aggregate has 16 byte alignment. Net effect is that types with
3190 alignment greater than 8 start at the next even boundary. */
3191 /* ??? The ABI does not specify how to handle aggregates with alignment from
3192 9 to 15 bytes, or greater than 16. We handle them all as if they had
3193 16 byte alignment. Such aggregates can occur only if gcc extensions are
3195 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3197 && (cum->words & 1))
3200 /* If all argument slots are used, then it must go on the stack. */
3201 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3204 /* Check for and handle homogeneous FP aggregates. */
3206 hfa_mode = hfa_element_mode (type, 0);
3208 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3209 and unprototyped hfas are passed specially. */
3210 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3214 int fp_regs = cum->fp_regs;
3215 int int_regs = cum->words + offset;
3216 int hfa_size = GET_MODE_SIZE (hfa_mode);
3220 /* If prototyped, pass it in FR regs then GR regs.
3221 If not prototyped, pass it in both FR and GR regs.
3223 If this is an SFmode aggregate, then it is possible to run out of
3224 FR regs while GR regs are still left. In that case, we pass the
3225 remaining part in the GR regs. */
3227 /* Fill the FP regs. We do this always. We stop if we reach the end
3228 of the argument, the last FP register, or the last argument slot. */
3230 byte_size = ((mode == BLKmode)
3231 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3232 args_byte_size = int_regs * UNITS_PER_WORD;
3234 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3235 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3237 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3238 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3242 args_byte_size += hfa_size;
3246 /* If no prototype, then the whole thing must go in GR regs. */
3247 if (! cum->prototype)
3249 /* If this is an SFmode aggregate, then we might have some left over
3250 that needs to go in GR regs. */
3251 else if (byte_size != offset)
3252 int_regs += offset / UNITS_PER_WORD;
3254 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3256 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3258 enum machine_mode gr_mode = DImode;
3260 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3261 then this goes in a GR reg left adjusted/little endian, right
3262 adjusted/big endian. */
3263 /* ??? Currently this is handled wrong, because 4-byte hunks are
3264 always right adjusted/little endian. */
3267 /* If we have an even 4 byte hunk because the aggregate is a
3268 multiple of 4 bytes in size, then this goes in a GR reg right
3269 adjusted/little endian. */
3270 else if (byte_size - offset == 4)
3272 /* Complex floats need to have float mode. */
3273 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3276 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3277 gen_rtx_REG (gr_mode, (basereg
3280 offset += GET_MODE_SIZE (gr_mode);
3281 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3282 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3285 /* If we ended up using just one location, just return that one loc. */
3287 return XEXP (loc[0], 0);
3289 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3292 /* Integral and aggregates go in general registers. If we have run out of
3293 FR registers, then FP values must also go in general registers. This can
3294 happen when we have a SFmode HFA. */
3295 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3296 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3297 return gen_rtx_REG (mode, basereg + cum->words + offset);
3299 /* If there is a prototype, then FP values go in a FR register when
3300 named, and in a GR registeer when unnamed. */
3301 else if (cum->prototype)
3304 return gen_rtx_REG (mode, basereg + cum->words + offset);
3306 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3308 /* If there is no prototype, then FP values go in both FR and GR
3312 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3313 gen_rtx_REG (mode, (FR_ARG_FIRST
3316 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3318 (basereg + cum->words
3322 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3326 /* Return number of words, at the beginning of the argument, that must be
3327 put in registers. 0 is the argument is entirely in registers or entirely
3331 ia64_function_arg_partial_nregs (cum, mode, type, named)
3332 CUMULATIVE_ARGS *cum;
3333 enum machine_mode mode;
3335 int named ATTRIBUTE_UNUSED;
3337 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3338 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3342 /* Arguments with alignment larger than 8 bytes start at the next even
3344 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3346 && (cum->words & 1))
3349 /* If all argument slots are used, then it must go on the stack. */
3350 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3353 /* It doesn't matter whether the argument goes in FR or GR regs. If
3354 it fits within the 8 argument slots, then it goes entirely in
3355 registers. If it extends past the last argument slot, then the rest
3356 goes on the stack. */
3358 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3361 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3364 /* Update CUM to point after this argument. This is patterned after
3365 ia64_function_arg. */
3368 ia64_function_arg_advance (cum, mode, type, named)
3369 CUMULATIVE_ARGS *cum;
3370 enum machine_mode mode;
3374 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3375 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3378 enum machine_mode hfa_mode = VOIDmode;
3380 /* If all arg slots are already full, then there is nothing to do. */
3381 if (cum->words >= MAX_ARGUMENT_SLOTS)
3384 /* Arguments with alignment larger than 8 bytes start at the next even
3386 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3388 && (cum->words & 1))
3391 cum->words += words + offset;
3393 /* Check for and handle homogeneous FP aggregates. */
3395 hfa_mode = hfa_element_mode (type, 0);
3397 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3398 and unprototyped hfas are passed specially. */
3399 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3401 int fp_regs = cum->fp_regs;
3402 /* This is the original value of cum->words + offset. */
3403 int int_regs = cum->words - words;
3404 int hfa_size = GET_MODE_SIZE (hfa_mode);
3408 /* If prototyped, pass it in FR regs then GR regs.
3409 If not prototyped, pass it in both FR and GR regs.
3411 If this is an SFmode aggregate, then it is possible to run out of
3412 FR regs while GR regs are still left. In that case, we pass the
3413 remaining part in the GR regs. */
3415 /* Fill the FP regs. We do this always. We stop if we reach the end
3416 of the argument, the last FP register, or the last argument slot. */
3418 byte_size = ((mode == BLKmode)
3419 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3420 args_byte_size = int_regs * UNITS_PER_WORD;
3422 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3423 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3426 args_byte_size += hfa_size;
3430 cum->fp_regs = fp_regs;
3433 /* Integral and aggregates go in general registers. If we have run out of
3434 FR registers, then FP values must also go in general registers. This can
3435 happen when we have a SFmode HFA. */
3436 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3437 cum->int_regs = cum->words;
3439 /* If there is a prototype, then FP values go in a FR register when
3440 named, and in a GR registeer when unnamed. */
3441 else if (cum->prototype)
3444 cum->int_regs = cum->words;
3446 /* ??? Complex types should not reach here. */
3447 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3449 /* If there is no prototype, then FP values go in both FR and GR
3453 /* ??? Complex types should not reach here. */
3454 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3455 cum->int_regs = cum->words;
3459 /* Variable sized types are passed by reference. */
3460 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3463 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3464 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3465 enum machine_mode mode ATTRIBUTE_UNUSED;
3467 int named ATTRIBUTE_UNUSED;
3469 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3473 /* Implement va_arg. */
3476 ia64_va_arg (valist, type)
3481 /* Variable sized types are passed by reference. */
3482 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3484 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3485 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3488 /* Arguments with alignment larger than 8 bytes start at the next even
3490 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3492 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3493 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3494 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3495 build_int_2 (-2 * UNITS_PER_WORD, -1));
3496 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3497 TREE_SIDE_EFFECTS (t) = 1;
3498 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3501 return std_expand_builtin_va_arg (valist, type);
3504 /* Return 1 if function return value returned in memory. Return 0 if it is
3508 ia64_return_in_memory (valtype)
3511 enum machine_mode mode;
3512 enum machine_mode hfa_mode;
3513 HOST_WIDE_INT byte_size;
3515 mode = TYPE_MODE (valtype);
3516 byte_size = GET_MODE_SIZE (mode);
3517 if (mode == BLKmode)
3519 byte_size = int_size_in_bytes (valtype);
3524 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3526 hfa_mode = hfa_element_mode (valtype, 0);
3527 if (hfa_mode != VOIDmode)
3529 int hfa_size = GET_MODE_SIZE (hfa_mode);
3531 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3536 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3542 /* Return rtx for register that holds the function return value. */
3545 ia64_function_value (valtype, func)
3547 tree func ATTRIBUTE_UNUSED;
3549 enum machine_mode mode;
3550 enum machine_mode hfa_mode;
3552 mode = TYPE_MODE (valtype);
3553 hfa_mode = hfa_element_mode (valtype, 0);
3555 if (hfa_mode != VOIDmode)
3563 hfa_size = GET_MODE_SIZE (hfa_mode);
3564 byte_size = ((mode == BLKmode)
3565 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3567 for (i = 0; offset < byte_size; i++)
3569 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3570 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3576 return XEXP (loc[0], 0);
3578 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3580 else if (FLOAT_TYPE_P (valtype) &&
3581 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3582 return gen_rtx_REG (mode, FR_ARG_FIRST);
3584 return gen_rtx_REG (mode, GR_RET_FIRST);
3587 /* Print a memory address as an operand to reference that memory location. */
3589 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3590 also call this from ia64_print_operand for memory addresses. */
3593 ia64_print_operand_address (stream, address)
3594 FILE * stream ATTRIBUTE_UNUSED;
3595 rtx address ATTRIBUTE_UNUSED;
3599 /* Print an operand to an assembler instruction.
3600 C Swap and print a comparison operator.
3601 D Print an FP comparison operator.
3602 E Print 32 - constant, for SImode shifts as extract.
3603 e Print 64 - constant, for DImode rotates.
3604 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3605 a floating point register emitted normally.
3606 I Invert a predicate register by adding 1.
3607 J Select the proper predicate register for a condition.
3608 j Select the inverse predicate register for a condition.
3609 O Append .acq for volatile load.
3610 P Postincrement of a MEM.
3611 Q Append .rel for volatile store.
3612 S Shift amount for shladd instruction.
3613 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3614 for Intel assembler.
3615 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3616 for Intel assembler.
3617 r Print register name, or constant 0 as r0. HP compatibility for
3620 ia64_print_operand (file, x, code)
3630 /* Handled below. */
3635 enum rtx_code c = swap_condition (GET_CODE (x));
3636 fputs (GET_RTX_NAME (c), file);
3641 switch (GET_CODE (x))
3653 str = GET_RTX_NAME (GET_CODE (x));
3660 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3664 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3668 if (x == CONST0_RTX (GET_MODE (x)))
3669 str = reg_names [FR_REG (0)];
3670 else if (x == CONST1_RTX (GET_MODE (x)))
3671 str = reg_names [FR_REG (1)];
3672 else if (GET_CODE (x) == REG)
3673 str = reg_names [REGNO (x)];
3680 fputs (reg_names [REGNO (x) + 1], file);
3686 unsigned int regno = REGNO (XEXP (x, 0));
3687 if (GET_CODE (x) == EQ)
3691 fputs (reg_names [regno], file);
3696 if (MEM_VOLATILE_P (x))
3697 fputs(".acq", file);
3702 HOST_WIDE_INT value;
3704 switch (GET_CODE (XEXP (x, 0)))
3710 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3711 if (GET_CODE (x) == CONST_INT)
3713 else if (GET_CODE (x) == REG)
3715 fprintf (file, ", %s", reg_names[REGNO (x)]);
3723 value = GET_MODE_SIZE (GET_MODE (x));
3727 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3733 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3738 if (MEM_VOLATILE_P (x))
3739 fputs(".rel", file);
3743 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3747 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3749 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3755 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3757 const char *prefix = "0x";
3758 if (INTVAL (x) & 0x80000000)
3760 fprintf (file, "0xffffffff");
3763 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3769 /* If this operand is the constant zero, write it as register zero.
3770 Any register, zero, or CONST_INT value is OK here. */
3771 if (GET_CODE (x) == REG)
3772 fputs (reg_names[REGNO (x)], file);
3773 else if (x == CONST0_RTX (GET_MODE (x)))
3775 else if (GET_CODE (x) == CONST_INT)
3776 output_addr_const (file, x);
3778 output_operand_lossage ("invalid %%r value");
3785 /* For conditional branches, returns or calls, substitute
3786 sptk, dptk, dpnt, or spnt for %s. */
3787 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3790 int pred_val = INTVAL (XEXP (x, 0));
3792 /* Guess top and bottom 10% statically predicted. */
3793 if (pred_val < REG_BR_PROB_BASE / 50)
3795 else if (pred_val < REG_BR_PROB_BASE / 2)
3797 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3802 else if (GET_CODE (current_output_insn) == CALL_INSN)
3807 fputs (which, file);
3812 x = current_insn_predicate;
3815 unsigned int regno = REGNO (XEXP (x, 0));
3816 if (GET_CODE (x) == EQ)
3818 fprintf (file, "(%s) ", reg_names [regno]);
3823 output_operand_lossage ("ia64_print_operand: unknown code");
3827 switch (GET_CODE (x))
3829 /* This happens for the spill/restore instructions. */
3834 /* ... fall through ... */
3837 fputs (reg_names [REGNO (x)], file);
3842 rtx addr = XEXP (x, 0);
3843 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3844 addr = XEXP (addr, 0);
3845 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3850 output_addr_const (file, x);
3857 /* Calulate the cost of moving data from a register in class FROM to
3858 one in class TO, using MODE. */
3861 ia64_register_move_cost (mode, from, to)
3862 enum machine_mode mode;
3863 enum reg_class from, to;
3865 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3866 if (to == ADDL_REGS)
3868 if (from == ADDL_REGS)
3871 /* All costs are symmetric, so reduce cases by putting the
3872 lower number class as the destination. */
3875 enum reg_class tmp = to;
3876 to = from, from = tmp;
3879 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3880 so that we get secondary memory reloads. Between FR_REGS,
3881 we have to make this at least as expensive as MEMORY_MOVE_COST
3882 to avoid spectacularly poor register class preferencing. */
3885 if (to != GR_REGS || from != GR_REGS)
3886 return MEMORY_MOVE_COST (mode, to, 0);
3894 /* Moving between PR registers takes two insns. */
3895 if (from == PR_REGS)
3897 /* Moving between PR and anything but GR is impossible. */
3898 if (from != GR_REGS)
3899 return MEMORY_MOVE_COST (mode, to, 0);
3903 /* Moving between BR and anything but GR is impossible. */
3904 if (from != GR_REGS && from != GR_AND_BR_REGS)
3905 return MEMORY_MOVE_COST (mode, to, 0);
3910 /* Moving between AR and anything but GR is impossible. */
3911 if (from != GR_REGS)
3912 return MEMORY_MOVE_COST (mode, to, 0);
3917 case GR_AND_FR_REGS:
3918 case GR_AND_BR_REGS:
3929 /* This function returns the register class required for a secondary
3930 register when copying between one of the registers in CLASS, and X,
3931 using MODE. A return value of NO_REGS means that no secondary register
3935 ia64_secondary_reload_class (class, mode, x)
3936 enum reg_class class;
3937 enum machine_mode mode ATTRIBUTE_UNUSED;
3942 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3943 regno = true_regnum (x);
3950 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3951 interaction. We end up with two pseudos with overlapping lifetimes
3952 both of which are equiv to the same constant, and both which need
3953 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3954 changes depending on the path length, which means the qty_first_reg
3955 check in make_regs_eqv can give different answers at different times.
3956 At some point I'll probably need a reload_indi pattern to handle
3959 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3960 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3961 non-general registers for good measure. */
3962 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
3965 /* This is needed if a pseudo used as a call_operand gets spilled to a
3967 if (GET_CODE (x) == MEM)
3972 /* Need to go through general regsters to get to other class regs. */
3973 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3976 /* This can happen when a paradoxical subreg is an operand to the
3978 /* ??? This shouldn't be necessary after instruction scheduling is
3979 enabled, because paradoxical subregs are not accepted by
3980 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3981 stop the paradoxical subreg stupidity in the *_operand functions
3983 if (GET_CODE (x) == MEM
3984 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3985 || GET_MODE (x) == QImode))
3988 /* This can happen because of the ior/and/etc patterns that accept FP
3989 registers as operands. If the third operand is a constant, then it
3990 needs to be reloaded into a FP register. */
3991 if (GET_CODE (x) == CONST_INT)
3994 /* This can happen because of register elimination in a muldi3 insn.
3995 E.g. `26107 * (unsigned long)&u'. */
3996 if (GET_CODE (x) == PLUS)
4001 /* ??? This happens if we cse/gcse a BImode value across a call,
4002 and the function has a nonlocal goto. This is because global
4003 does not allocate call crossing pseudos to hard registers when
4004 current_function_has_nonlocal_goto is true. This is relatively
4005 common for C++ programs that use exceptions. To reproduce,
4006 return NO_REGS and compile libstdc++. */
4007 if (GET_CODE (x) == MEM)
4010 /* This can happen when we take a BImode subreg of a DImode value,
4011 and that DImode value winds up in some non-GR register. */
4012 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4017 /* Since we have no offsettable memory addresses, we need a temporary
4018 to hold the address of the second word. */
4031 /* Emit text to declare externally defined variables and functions, because
4032 the Intel assembler does not support undefined externals. */
4035 ia64_asm_output_external (file, decl, name)
4040 int save_referenced;
4042 /* GNU as does not need anything here. */
4046 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4047 the linker when we do this, so we need to be careful not to do this for
4048 builtin functions which have no library equivalent. Unfortunately, we
4049 can't tell here whether or not a function will actually be called by
4050 expand_expr, so we pull in library functions even if we may not need
4052 if (! strcmp (name, "__builtin_next_arg")
4053 || ! strcmp (name, "alloca")
4054 || ! strcmp (name, "__builtin_constant_p")
4055 || ! strcmp (name, "__builtin_args_info"))
4058 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4060 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4061 if (TREE_CODE (decl) == FUNCTION_DECL)
4062 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4063 ASM_GLOBALIZE_LABEL (file, name);
4064 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4067 /* Parse the -mfixed-range= option string. */
4070 fix_range (const_str)
4071 const char *const_str;
4074 char *str, *dash, *comma;
4076 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4077 REG2 are either register names or register numbers. The effect
4078 of this option is to mark the registers in the range from REG1 to
4079 REG2 as ``fixed'' so they won't be used by the compiler. This is
4080 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4082 i = strlen (const_str);
4083 str = (char *) alloca (i + 1);
4084 memcpy (str, const_str, i + 1);
4088 dash = strchr (str, '-');
4091 warning ("value of -mfixed-range must have form REG1-REG2");
4096 comma = strchr (dash + 1, ',');
4100 first = decode_reg_name (str);
4103 warning ("unknown register name: %s", str);
4107 last = decode_reg_name (dash + 1);
4110 warning ("unknown register name: %s", dash + 1);
4118 warning ("%s-%s is an empty range", str, dash + 1);
4122 for (i = first; i <= last; ++i)
4123 fixed_regs[i] = call_used_regs[i] = 1;
4133 static struct machine_function *
4134 ia64_init_machine_status ()
4136 return ggc_alloc_cleared (sizeof (struct machine_function));
4139 /* Handle TARGET_OPTIONS switches. */
4142 ia64_override_options ()
4144 if (TARGET_AUTO_PIC)
4145 target_flags |= MASK_CONST_GP;
4147 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
4149 warning ("cannot optimize division for both latency and throughput");
4150 target_flags &= ~MASK_INLINE_DIV_THR;
4153 if (ia64_fixed_range_string)
4154 fix_range (ia64_fixed_range_string);
4156 if (ia64_tls_size_string)
4159 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4160 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4161 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4163 ia64_tls_size = tmp;
4166 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4167 flag_schedule_insns_after_reload = 0;
4169 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4171 init_machine_status = ia64_init_machine_status;
4174 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
4175 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4176 static enum attr_type ia64_safe_type PARAMS((rtx));
4178 static enum attr_itanium_requires_unit0
4179 ia64_safe_itanium_requires_unit0 (insn)
4182 if (recog_memoized (insn) >= 0)
4183 return get_attr_itanium_requires_unit0 (insn);
4185 return ITANIUM_REQUIRES_UNIT0_NO;
4188 static enum attr_itanium_class
4189 ia64_safe_itanium_class (insn)
4192 if (recog_memoized (insn) >= 0)
4193 return get_attr_itanium_class (insn);
4195 return ITANIUM_CLASS_UNKNOWN;
4198 static enum attr_type
4199 ia64_safe_type (insn)
4202 if (recog_memoized (insn) >= 0)
4203 return get_attr_type (insn);
4205 return TYPE_UNKNOWN;
4208 /* The following collection of routines emit instruction group stop bits as
4209 necessary to avoid dependencies. */
4211 /* Need to track some additional registers as far as serialization is
4212 concerned so we can properly handle br.call and br.ret. We could
4213 make these registers visible to gcc, but since these registers are
4214 never explicitly used in gcc generated code, it seems wasteful to
4215 do so (plus it would make the call and return patterns needlessly
4217 #define REG_GP (GR_REG (1))
4218 #define REG_RP (BR_REG (0))
4219 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4220 /* This is used for volatile asms which may require a stop bit immediately
4221 before and after them. */
4222 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4223 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4224 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4226 /* For each register, we keep track of how it has been written in the
4227 current instruction group.
4229 If a register is written unconditionally (no qualifying predicate),
4230 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4232 If a register is written if its qualifying predicate P is true, we
4233 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4234 may be written again by the complement of P (P^1) and when this happens,
4235 WRITE_COUNT gets set to 2.
4237 The result of this is that whenever an insn attempts to write a register
4238 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4240 If a predicate register is written by a floating-point insn, we set
4241 WRITTEN_BY_FP to true.
4243 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4244 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4246 struct reg_write_state
4248 unsigned int write_count : 2;
4249 unsigned int first_pred : 16;
4250 unsigned int written_by_fp : 1;
4251 unsigned int written_by_and : 1;
4252 unsigned int written_by_or : 1;
4255 /* Cumulative info for the current instruction group. */
4256 struct reg_write_state rws_sum[NUM_REGS];
4257 /* Info for the current instruction. This gets copied to rws_sum after a
4258 stop bit is emitted. */
4259 struct reg_write_state rws_insn[NUM_REGS];
4261 /* Indicates whether this is the first instruction after a stop bit,
4262 in which case we don't need another stop bit. Without this, we hit
4263 the abort in ia64_variable_issue when scheduling an alloc. */
4264 static int first_instruction;
4266 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4267 RTL for one instruction. */
4270 unsigned int is_write : 1; /* Is register being written? */
4271 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4272 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4273 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4274 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4275 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4278 static void rws_update PARAMS ((struct reg_write_state *, int,
4279 struct reg_flags, int));
4280 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4281 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4282 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4283 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4284 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4285 static void init_insn_group_barriers PARAMS ((void));
4286 static int group_barrier_needed_p PARAMS ((rtx));
4287 static int safe_group_barrier_needed_p PARAMS ((rtx));
4289 /* Update *RWS for REGNO, which is being written by the current instruction,
4290 with predicate PRED, and associated register flags in FLAGS. */
4293 rws_update (rws, regno, flags, pred)
4294 struct reg_write_state *rws;
4296 struct reg_flags flags;
4300 rws[regno].write_count++;
4302 rws[regno].write_count = 2;
4303 rws[regno].written_by_fp |= flags.is_fp;
4304 /* ??? Not tracking and/or across differing predicates. */
4305 rws[regno].written_by_and = flags.is_and;
4306 rws[regno].written_by_or = flags.is_or;
4307 rws[regno].first_pred = pred;
4310 /* Handle an access to register REGNO of type FLAGS using predicate register
4311 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4312 a dependency with an earlier instruction in the same group. */
4315 rws_access_regno (regno, flags, pred)
4317 struct reg_flags flags;
4320 int need_barrier = 0;
4322 if (regno >= NUM_REGS)
4325 if (! PR_REGNO_P (regno))
4326 flags.is_and = flags.is_or = 0;
4332 /* One insn writes same reg multiple times? */
4333 if (rws_insn[regno].write_count > 0)
4336 /* Update info for current instruction. */
4337 rws_update (rws_insn, regno, flags, pred);
4338 write_count = rws_sum[regno].write_count;
4340 switch (write_count)
4343 /* The register has not been written yet. */
4344 rws_update (rws_sum, regno, flags, pred);
4348 /* The register has been written via a predicate. If this is
4349 not a complementary predicate, then we need a barrier. */
4350 /* ??? This assumes that P and P+1 are always complementary
4351 predicates for P even. */
4352 if (flags.is_and && rws_sum[regno].written_by_and)
4354 else if (flags.is_or && rws_sum[regno].written_by_or)
4356 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4358 rws_update (rws_sum, regno, flags, pred);
4362 /* The register has been unconditionally written already. We
4364 if (flags.is_and && rws_sum[regno].written_by_and)
4366 else if (flags.is_or && rws_sum[regno].written_by_or)
4370 rws_sum[regno].written_by_and = flags.is_and;
4371 rws_sum[regno].written_by_or = flags.is_or;
4380 if (flags.is_branch)
4382 /* Branches have several RAW exceptions that allow to avoid
4385 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4386 /* RAW dependencies on branch regs are permissible as long
4387 as the writer is a non-branch instruction. Since we
4388 never generate code that uses a branch register written
4389 by a branch instruction, handling this case is
4393 if (REGNO_REG_CLASS (regno) == PR_REGS
4394 && ! rws_sum[regno].written_by_fp)
4395 /* The predicates of a branch are available within the
4396 same insn group as long as the predicate was written by
4397 something other than a floating-point instruction. */
4401 if (flags.is_and && rws_sum[regno].written_by_and)
4403 if (flags.is_or && rws_sum[regno].written_by_or)
4406 switch (rws_sum[regno].write_count)
4409 /* The register has not been written yet. */
4413 /* The register has been written via a predicate. If this is
4414 not a complementary predicate, then we need a barrier. */
4415 /* ??? This assumes that P and P+1 are always complementary
4416 predicates for P even. */
4417 if ((rws_sum[regno].first_pred ^ 1) != pred)
4422 /* The register has been unconditionally written already. We
4432 return need_barrier;
4436 rws_access_reg (reg, flags, pred)
4438 struct reg_flags flags;
4441 int regno = REGNO (reg);
4442 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4445 return rws_access_regno (regno, flags, pred);
4448 int need_barrier = 0;
4450 need_barrier |= rws_access_regno (regno + n, flags, pred);
4451 return need_barrier;
4455 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4456 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4459 update_set_flags (x, pflags, ppred, pcond)
4461 struct reg_flags *pflags;
4465 rtx src = SET_SRC (x);
4469 switch (GET_CODE (src))
4475 if (SET_DEST (x) == pc_rtx)
4476 /* X is a conditional branch. */
4480 int is_complemented = 0;
4482 /* X is a conditional move. */
4483 rtx cond = XEXP (src, 0);
4484 if (GET_CODE (cond) == EQ)
4485 is_complemented = 1;
4486 cond = XEXP (cond, 0);
4487 if (GET_CODE (cond) != REG
4488 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4491 if (XEXP (src, 1) == SET_DEST (x)
4492 || XEXP (src, 2) == SET_DEST (x))
4494 /* X is a conditional move that conditionally writes the
4497 /* We need another complement in this case. */
4498 if (XEXP (src, 1) == SET_DEST (x))
4499 is_complemented = ! is_complemented;
4501 *ppred = REGNO (cond);
4502 if (is_complemented)
4506 /* ??? If this is a conditional write to the dest, then this
4507 instruction does not actually read one source. This probably
4508 doesn't matter, because that source is also the dest. */
4509 /* ??? Multiple writes to predicate registers are allowed
4510 if they are all AND type compares, or if they are all OR
4511 type compares. We do not generate such instructions
4514 /* ... fall through ... */
4517 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4518 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4519 /* Set pflags->is_fp to 1 so that we know we're dealing
4520 with a floating point comparison when processing the
4521 destination of the SET. */
4524 /* Discover if this is a parallel comparison. We only handle
4525 and.orcm and or.andcm at present, since we must retain a
4526 strict inverse on the predicate pair. */
4527 else if (GET_CODE (src) == AND)
4529 else if (GET_CODE (src) == IOR)
4536 /* Subroutine of rtx_needs_barrier; this function determines whether the
4537 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4538 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4542 set_src_needs_barrier (x, flags, pred, cond)
4544 struct reg_flags flags;
4548 int need_barrier = 0;
4550 rtx src = SET_SRC (x);
4552 if (GET_CODE (src) == CALL)
4553 /* We don't need to worry about the result registers that
4554 get written by subroutine call. */
4555 return rtx_needs_barrier (src, flags, pred);
4556 else if (SET_DEST (x) == pc_rtx)
4558 /* X is a conditional branch. */
4559 /* ??? This seems redundant, as the caller sets this bit for
4561 flags.is_branch = 1;
4562 return rtx_needs_barrier (src, flags, pred);
4565 need_barrier = rtx_needs_barrier (src, flags, pred);
4567 /* This instruction unconditionally uses a predicate register. */
4569 need_barrier |= rws_access_reg (cond, flags, 0);
4572 if (GET_CODE (dst) == ZERO_EXTRACT)
4574 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4575 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4576 dst = XEXP (dst, 0);
4578 return need_barrier;
4581 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4582 Return 1 is this access creates a dependency with an earlier instruction
4583 in the same group. */
4586 rtx_needs_barrier (x, flags, pred)
4588 struct reg_flags flags;
4592 int is_complemented = 0;
4593 int need_barrier = 0;
4594 const char *format_ptr;
4595 struct reg_flags new_flags;
4603 switch (GET_CODE (x))
4606 update_set_flags (x, &new_flags, &pred, &cond);
4607 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4608 if (GET_CODE (SET_SRC (x)) != CALL)
4610 new_flags.is_write = 1;
4611 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4616 new_flags.is_write = 0;
4617 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4619 /* Avoid multiple register writes, in case this is a pattern with
4620 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4621 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4623 new_flags.is_write = 1;
4624 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4625 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4626 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4631 /* X is a predicated instruction. */
4633 cond = COND_EXEC_TEST (x);
4636 need_barrier = rtx_needs_barrier (cond, flags, 0);
4638 if (GET_CODE (cond) == EQ)
4639 is_complemented = 1;
4640 cond = XEXP (cond, 0);
4641 if (GET_CODE (cond) != REG
4642 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4644 pred = REGNO (cond);
4645 if (is_complemented)
4648 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4649 return need_barrier;
4653 /* Clobber & use are for earlier compiler-phases only. */
4658 /* We always emit stop bits for traditional asms. We emit stop bits
4659 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4660 if (GET_CODE (x) != ASM_OPERANDS
4661 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4663 /* Avoid writing the register multiple times if we have multiple
4664 asm outputs. This avoids an abort in rws_access_reg. */
4665 if (! rws_insn[REG_VOLATILE].write_count)
4667 new_flags.is_write = 1;
4668 rws_access_regno (REG_VOLATILE, new_flags, pred);
4673 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4674 We can not just fall through here since then we would be confused
4675 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4676 traditional asms unlike their normal usage. */
4678 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4679 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4684 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4686 rtx pat = XVECEXP (x, 0, i);
4687 if (GET_CODE (pat) == SET)
4689 update_set_flags (pat, &new_flags, &pred, &cond);
4690 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4692 else if (GET_CODE (pat) == USE
4693 || GET_CODE (pat) == CALL
4694 || GET_CODE (pat) == ASM_OPERANDS)
4695 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4696 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4699 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4701 rtx pat = XVECEXP (x, 0, i);
4702 if (GET_CODE (pat) == SET)
4704 if (GET_CODE (SET_SRC (pat)) != CALL)
4706 new_flags.is_write = 1;
4707 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4711 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4712 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4720 if (REGNO (x) == AR_UNAT_REGNUM)
4722 for (i = 0; i < 64; ++i)
4723 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4726 need_barrier = rws_access_reg (x, flags, pred);
4730 /* Find the regs used in memory address computation. */
4731 new_flags.is_write = 0;
4732 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4735 case CONST_INT: case CONST_DOUBLE:
4736 case SYMBOL_REF: case LABEL_REF: case CONST:
4739 /* Operators with side-effects. */
4740 case POST_INC: case POST_DEC:
4741 if (GET_CODE (XEXP (x, 0)) != REG)
4744 new_flags.is_write = 0;
4745 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4746 new_flags.is_write = 1;
4747 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4751 if (GET_CODE (XEXP (x, 0)) != REG)
4754 new_flags.is_write = 0;
4755 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4756 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4757 new_flags.is_write = 1;
4758 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4761 /* Handle common unary and binary ops for efficiency. */
4762 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4763 case MOD: case UDIV: case UMOD: case AND: case IOR:
4764 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4765 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4766 case NE: case EQ: case GE: case GT: case LE:
4767 case LT: case GEU: case GTU: case LEU: case LTU:
4768 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4769 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4772 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4773 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4774 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4775 case SQRT: case FFS:
4776 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4780 switch (XINT (x, 1))
4782 case UNSPEC_LTOFF_DTPMOD:
4783 case UNSPEC_LTOFF_DTPREL:
4785 case UNSPEC_LTOFF_TPREL:
4787 case UNSPEC_PRED_REL_MUTEX:
4788 case UNSPEC_PIC_CALL:
4790 case UNSPEC_FETCHADD_ACQ:
4791 case UNSPEC_BSP_VALUE:
4792 case UNSPEC_FLUSHRS:
4793 case UNSPEC_BUNDLE_SELECTOR:
4796 case UNSPEC_GR_SPILL:
4797 case UNSPEC_GR_RESTORE:
4799 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4800 HOST_WIDE_INT bit = (offset >> 3) & 63;
4802 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4803 new_flags.is_write = (XINT (x, 1) == 1);
4804 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4809 case UNSPEC_FR_SPILL:
4810 case UNSPEC_FR_RESTORE:
4812 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4816 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4819 case UNSPEC_FR_RECIP_APPROX:
4820 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4821 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4824 case UNSPEC_CMPXCHG_ACQ:
4825 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4826 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4834 case UNSPEC_VOLATILE:
4835 switch (XINT (x, 1))
4838 /* Alloc must always be the first instruction of a group.
4839 We force this by always returning true. */
4840 /* ??? We might get better scheduling if we explicitly check for
4841 input/local/output register dependencies, and modify the
4842 scheduler so that alloc is always reordered to the start of
4843 the current group. We could then eliminate all of the
4844 first_instruction code. */
4845 rws_access_regno (AR_PFS_REGNUM, flags, pred);
4847 new_flags.is_write = 1;
4848 rws_access_regno (REG_AR_CFM, new_flags, pred);
4851 case UNSPECV_SET_BSP:
4855 case UNSPECV_BLOCKAGE:
4856 case UNSPECV_INSN_GROUP_BARRIER:
4858 case UNSPECV_PSAC_ALL:
4859 case UNSPECV_PSAC_NORMAL:
4868 new_flags.is_write = 0;
4869 need_barrier = rws_access_regno (REG_RP, flags, pred);
4870 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4872 new_flags.is_write = 1;
4873 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4874 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4878 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4879 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4880 switch (format_ptr[i])
4882 case '0': /* unused field */
4883 case 'i': /* integer */
4884 case 'n': /* note */
4885 case 'w': /* wide integer */
4886 case 's': /* pointer to string */
4887 case 'S': /* optional pointer to string */
4891 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4896 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4897 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4906 return need_barrier;
4909 /* Clear out the state for group_barrier_needed_p at the start of a
4910 sequence of insns. */
4913 init_insn_group_barriers ()
4915 memset (rws_sum, 0, sizeof (rws_sum));
4916 first_instruction = 1;
4919 /* Given the current state, recorded by previous calls to this function,
4920 determine whether a group barrier (a stop bit) is necessary before INSN.
4921 Return nonzero if so. */
4924 group_barrier_needed_p (insn)
4928 int need_barrier = 0;
4929 struct reg_flags flags;
4931 memset (&flags, 0, sizeof (flags));
4932 switch (GET_CODE (insn))
4938 /* A barrier doesn't imply an instruction group boundary. */
4942 memset (rws_insn, 0, sizeof (rws_insn));
4946 flags.is_branch = 1;
4947 flags.is_sibcall = SIBLING_CALL_P (insn);
4948 memset (rws_insn, 0, sizeof (rws_insn));
4950 /* Don't bundle a call following another call. */
4951 if ((pat = prev_active_insn (insn))
4952 && GET_CODE (pat) == CALL_INSN)
4958 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4962 flags.is_branch = 1;
4964 /* Don't bundle a jump following a call. */
4965 if ((pat = prev_active_insn (insn))
4966 && GET_CODE (pat) == CALL_INSN)
4974 if (GET_CODE (PATTERN (insn)) == USE
4975 || GET_CODE (PATTERN (insn)) == CLOBBER)
4976 /* Don't care about USE and CLOBBER "insns"---those are used to
4977 indicate to the optimizer that it shouldn't get rid of
4978 certain operations. */
4981 pat = PATTERN (insn);
4983 /* Ug. Hack hacks hacked elsewhere. */
4984 switch (recog_memoized (insn))
4986 /* We play dependency tricks with the epilogue in order
4987 to get proper schedules. Undo this for dv analysis. */
4988 case CODE_FOR_epilogue_deallocate_stack:
4989 case CODE_FOR_prologue_allocate_stack:
4990 pat = XVECEXP (pat, 0, 0);
4993 /* The pattern we use for br.cloop confuses the code above.
4994 The second element of the vector is representative. */
4995 case CODE_FOR_doloop_end_internal:
4996 pat = XVECEXP (pat, 0, 1);
4999 /* Doesn't generate code. */
5000 case CODE_FOR_pred_rel_mutex:
5001 case CODE_FOR_prologue_use:
5008 memset (rws_insn, 0, sizeof (rws_insn));
5009 need_barrier = rtx_needs_barrier (pat, flags, 0);
5011 /* Check to see if the previous instruction was a volatile
5014 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5021 if (first_instruction)
5024 first_instruction = 0;
5027 return need_barrier;
5030 /* Like group_barrier_needed_p, but do not clobber the current state. */
5033 safe_group_barrier_needed_p (insn)
5036 struct reg_write_state rws_saved[NUM_REGS];
5037 int saved_first_instruction;
5040 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5041 saved_first_instruction = first_instruction;
5043 t = group_barrier_needed_p (insn);
5045 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5046 first_instruction = saved_first_instruction;
5051 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
5052 as necessary to eliminate dependendencies. This function assumes that
5053 a final instruction scheduling pass has been run which has already
5054 inserted most of the necessary stop bits. This function only inserts
5055 new ones at basic block boundaries, since these are invisible to the
5059 emit_insn_group_barriers (dump, insns)
5065 int insns_since_last_label = 0;
5067 init_insn_group_barriers ();
5069 for (insn = insns; insn; insn = NEXT_INSN (insn))
5071 if (GET_CODE (insn) == CODE_LABEL)
5073 if (insns_since_last_label)
5075 insns_since_last_label = 0;
5077 else if (GET_CODE (insn) == NOTE
5078 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5080 if (insns_since_last_label)
5082 insns_since_last_label = 0;
5084 else if (GET_CODE (insn) == INSN
5085 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5086 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5088 init_insn_group_barriers ();
5091 else if (INSN_P (insn))
5093 insns_since_last_label = 1;
5095 if (group_barrier_needed_p (insn))
5100 fprintf (dump, "Emitting stop before label %d\n",
5101 INSN_UID (last_label));
5102 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5105 init_insn_group_barriers ();
5113 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5114 This function has to emit all necessary group barriers. */
5117 emit_all_insn_group_barriers (dump, insns)
5118 FILE *dump ATTRIBUTE_UNUSED;
5123 init_insn_group_barriers ();
5125 for (insn = insns; insn; insn = NEXT_INSN (insn))
5127 if (GET_CODE (insn) == BARRIER)
5129 rtx last = prev_active_insn (insn);
5133 if (GET_CODE (last) == JUMP_INSN
5134 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5135 last = prev_active_insn (last);
5136 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5137 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5139 init_insn_group_barriers ();
5141 else if (INSN_P (insn))
5143 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5144 init_insn_group_barriers ();
5145 else if (group_barrier_needed_p (insn))
5147 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5148 init_insn_group_barriers ();
5149 group_barrier_needed_p (insn);
5155 static int errata_find_address_regs PARAMS ((rtx *, void *));
5156 static void errata_emit_nops PARAMS ((rtx));
5157 static void fixup_errata PARAMS ((void));
5159 /* This structure is used to track some details about the previous insns
5160 groups so we can determine if it may be necessary to insert NOPs to
5161 workaround hardware errata. */
5164 HARD_REG_SET p_reg_set;
5165 HARD_REG_SET gr_reg_conditionally_set;
5168 /* Index into the last_group array. */
5169 static int group_idx;
5171 /* Called through for_each_rtx; determines if a hard register that was
5172 conditionally set in the previous group is used as an address register.
5173 It ensures that for_each_rtx returns 1 in that case. */
5175 errata_find_address_regs (xp, data)
5177 void *data ATTRIBUTE_UNUSED;
5180 if (GET_CODE (x) != MEM)
5183 if (GET_CODE (x) == POST_MODIFY)
5185 if (GET_CODE (x) == REG)
5187 struct group *prev_group = last_group + (group_idx ^ 1);
5188 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5196 /* Called for each insn; this function keeps track of the state in
5197 last_group and emits additional NOPs if necessary to work around
5198 an Itanium A/B step erratum. */
5200 errata_emit_nops (insn)
5203 struct group *this_group = last_group + group_idx;
5204 struct group *prev_group = last_group + (group_idx ^ 1);
5205 rtx pat = PATTERN (insn);
5206 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5207 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5208 enum attr_type type;
5211 if (GET_CODE (real_pat) == USE
5212 || GET_CODE (real_pat) == CLOBBER
5213 || GET_CODE (real_pat) == ASM_INPUT
5214 || GET_CODE (real_pat) == ADDR_VEC
5215 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5216 || asm_noperands (PATTERN (insn)) >= 0)
5219 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5222 if (GET_CODE (set) == PARALLEL)
5225 set = XVECEXP (real_pat, 0, 0);
5226 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5227 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5228 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5235 if (set && GET_CODE (set) != SET)
5238 type = get_attr_type (insn);
5241 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5242 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5244 if ((type == TYPE_M || type == TYPE_A) && cond && set
5245 && REG_P (SET_DEST (set))
5246 && GET_CODE (SET_SRC (set)) != PLUS
5247 && GET_CODE (SET_SRC (set)) != MINUS
5248 && (GET_CODE (SET_SRC (set)) != ASHIFT
5249 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5250 && (GET_CODE (SET_SRC (set)) != MEM
5251 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5252 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5254 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5255 || ! REG_P (XEXP (cond, 0)))
5258 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5259 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5261 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5263 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5264 emit_insn_before (gen_nop (), insn);
5265 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5267 memset (last_group, 0, sizeof last_group);
5271 /* Emit extra nops if they are required to work around hardware errata. */
5278 if (! TARGET_B_STEP)
5282 memset (last_group, 0, sizeof last_group);
5284 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5289 if (ia64_safe_type (insn) == TYPE_S)
5292 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5295 errata_emit_nops (insn);
5299 /* Instruction scheduling support. */
5300 /* Describe one bundle. */
5304 /* Zero if there's no possibility of a stop in this bundle other than
5305 at the end, otherwise the position of the optional stop bit. */
5307 /* The types of the three slots. */
5308 enum attr_type t[3];
5309 /* The pseudo op to be emitted into the assembler output. */
5313 #define NR_BUNDLES 10
5315 /* A list of all available bundles. */
5317 static const struct bundle bundle[NR_BUNDLES] =
5319 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5320 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5321 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5322 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5323 #if NR_BUNDLES == 10
5324 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5325 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5327 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5328 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5329 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5330 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5331 it matches an L type insn. Otherwise we'll try to generate L type
5333 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5336 /* Describe a packet of instructions. Packets consist of two bundles that
5337 are visible to the hardware in one scheduling window. */
5341 const struct bundle *t1, *t2;
5342 /* Precomputed value of the first split issue in this packet if a cycle
5343 starts at its beginning. */
5345 /* For convenience, the insn types are replicated here so we don't have
5346 to go through T1 and T2 all the time. */
5347 enum attr_type t[6];
5350 /* An array containing all possible packets. */
5351 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5352 static struct ia64_packet packets[NR_PACKETS];
5354 /* Map attr_type to a string with the name. */
5356 static const char *const type_names[] =
5358 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5361 /* Nonzero if we should insert stop bits into the schedule. */
5362 int ia64_final_schedule = 0;
5364 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5365 static rtx ia64_single_set PARAMS ((rtx));
5366 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5367 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5368 static void maybe_rotate PARAMS ((FILE *));
5369 static void finish_last_head PARAMS ((FILE *, int));
5370 static void rotate_one_bundle PARAMS ((FILE *));
5371 static void rotate_two_bundles PARAMS ((FILE *));
5372 static void nop_cycles_until PARAMS ((int, FILE *));
5373 static void cycle_end_fill_slots PARAMS ((FILE *));
5374 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5375 static int get_split PARAMS ((const struct ia64_packet *, int));
5376 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5377 const struct ia64_packet *, int));
5378 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5379 rtx *, enum attr_type *, int));
5380 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5381 static void dump_current_packet PARAMS ((FILE *));
5382 static void schedule_stop PARAMS ((FILE *));
5383 static rtx gen_nop_type PARAMS ((enum attr_type));
5384 static void ia64_emit_nops PARAMS ((void));
5386 /* Map a bundle number to its pseudo-op. */
5392 return bundle[b].name;
5395 /* Compute the slot which will cause a split issue in packet P if the
5396 current cycle begins at slot BEGIN. */
5399 itanium_split_issue (p, begin)
5400 const struct ia64_packet *p;
5403 int type_count[TYPE_S];
5409 /* Always split before and after MMF. */
5410 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5412 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5414 /* Always split after MBB and BBB. */
5415 if (p->t[1] == TYPE_B)
5417 /* Split after first bundle in MIB BBB combination. */
5418 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5422 memset (type_count, 0, sizeof type_count);
5423 for (i = begin; i < split; i++)
5425 enum attr_type t0 = p->t[i];
5426 /* An MLX bundle reserves the same units as an MFI bundle. */
5427 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5428 : t0 == TYPE_X ? TYPE_I
5431 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5432 2 integer per cycle. */
5433 int max = (t == TYPE_B ? 3 : 2);
5434 if (type_count[t] == max)
5442 /* Return the maximum number of instructions a cpu can issue. */
5450 /* Helper function - like single_set, but look inside COND_EXEC. */
5453 ia64_single_set (insn)
5456 rtx x = PATTERN (insn), ret;
5457 if (GET_CODE (x) == COND_EXEC)
5458 x = COND_EXEC_CODE (x);
5459 if (GET_CODE (x) == SET)
5462 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5463 Although they are not classical single set, the second set is there just
5464 to protect it from moving past FP-relative stack accesses. */
5465 switch (recog_memoized (insn))
5467 case CODE_FOR_prologue_allocate_stack:
5468 case CODE_FOR_epilogue_deallocate_stack:
5469 ret = XVECEXP (x, 0, 0);
5473 ret = single_set_2 (insn, x);
5480 /* Adjust the cost of a scheduling dependency. Return the new cost of
5481 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5484 ia64_adjust_cost (insn, link, dep_insn, cost)
5485 rtx insn, link, dep_insn;
5488 enum attr_type dep_type;
5489 enum attr_itanium_class dep_class;
5490 enum attr_itanium_class insn_class;
5491 rtx dep_set, set, src, addr;
5493 if (GET_CODE (PATTERN (insn)) == CLOBBER
5494 || GET_CODE (PATTERN (insn)) == USE
5495 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5496 || GET_CODE (PATTERN (dep_insn)) == USE
5497 /* @@@ Not accurate for indirect calls. */
5498 || GET_CODE (insn) == CALL_INSN
5499 || ia64_safe_type (insn) == TYPE_S)
5502 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5503 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5506 dep_type = ia64_safe_type (dep_insn);
5507 dep_class = ia64_safe_itanium_class (dep_insn);
5508 insn_class = ia64_safe_itanium_class (insn);
5510 /* Compares that feed a conditional branch can execute in the same
5512 dep_set = ia64_single_set (dep_insn);
5513 set = ia64_single_set (insn);
5515 if (dep_type != TYPE_F
5517 && GET_CODE (SET_DEST (dep_set)) == REG
5518 && PR_REG (REGNO (SET_DEST (dep_set)))
5519 && GET_CODE (insn) == JUMP_INSN)
5522 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5524 /* ??? Can't find any information in the documenation about whether
5528 splits issue. Assume it doesn't. */
5532 src = set ? SET_SRC (set) : 0;
5536 if (GET_CODE (SET_DEST (set)) == MEM)
5537 addr = XEXP (SET_DEST (set), 0);
5538 else if (GET_CODE (SET_DEST (set)) == SUBREG
5539 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5540 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5544 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5545 addr = XVECEXP (addr, 0, 0);
5546 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5547 addr = XEXP (addr, 0);
5548 if (GET_CODE (addr) == MEM)
5549 addr = XEXP (addr, 0);
5555 if (addr && GET_CODE (addr) == POST_MODIFY)
5556 addr = XEXP (addr, 0);
5558 set = ia64_single_set (dep_insn);
5560 if ((dep_class == ITANIUM_CLASS_IALU
5561 || dep_class == ITANIUM_CLASS_ILOG
5562 || dep_class == ITANIUM_CLASS_LD)
5563 && (insn_class == ITANIUM_CLASS_LD
5564 || insn_class == ITANIUM_CLASS_ST))
5566 if (! addr || ! set)
5568 /* This isn't completely correct - an IALU that feeds an address has
5569 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5570 otherwise. Unfortunately there's no good way to describe this. */
5571 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5575 if ((dep_class == ITANIUM_CLASS_IALU
5576 || dep_class == ITANIUM_CLASS_ILOG
5577 || dep_class == ITANIUM_CLASS_LD)
5578 && (insn_class == ITANIUM_CLASS_MMMUL
5579 || insn_class == ITANIUM_CLASS_MMSHF
5580 || insn_class == ITANIUM_CLASS_MMSHFI))
5583 if (dep_class == ITANIUM_CLASS_FMAC
5584 && (insn_class == ITANIUM_CLASS_FMISC
5585 || insn_class == ITANIUM_CLASS_FCVTFX
5586 || insn_class == ITANIUM_CLASS_XMPY))
5589 if ((dep_class == ITANIUM_CLASS_FMAC
5590 || dep_class == ITANIUM_CLASS_FMISC
5591 || dep_class == ITANIUM_CLASS_FCVTFX
5592 || dep_class == ITANIUM_CLASS_XMPY)
5593 && insn_class == ITANIUM_CLASS_STF)
5596 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5597 but HP engineers say any non-MM operation. */
5598 if ((dep_class == ITANIUM_CLASS_MMMUL
5599 || dep_class == ITANIUM_CLASS_MMSHF
5600 || dep_class == ITANIUM_CLASS_MMSHFI)
5601 && insn_class != ITANIUM_CLASS_MMMUL
5602 && insn_class != ITANIUM_CLASS_MMSHF
5603 && insn_class != ITANIUM_CLASS_MMSHFI)
5609 /* Describe the current state of the Itanium pipeline. */
5612 /* The first slot that is used in the current cycle. */
5614 /* The next slot to fill. */
5616 /* The packet we have selected for the current issue window. */
5617 const struct ia64_packet *packet;
5618 /* The position of the split issue that occurs due to issue width
5619 limitations (6 if there's no split issue). */
5621 /* Record data about the insns scheduled so far in the same issue
5622 window. The elements up to but not including FIRST_SLOT belong
5623 to the previous cycle, the ones starting with FIRST_SLOT belong
5624 to the current cycle. */
5625 enum attr_type types[6];
5628 /* Nonzero if we decided to schedule a stop bit. */
5632 /* Temporary arrays; they have enough elements to hold all insns that
5633 can be ready at the same time while scheduling of the current block.
5634 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5635 static rtx *sched_ready;
5636 static enum attr_type *sched_types;
5638 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5642 insn_matches_slot (p, itype, slot, insn)
5643 const struct ia64_packet *p;
5644 enum attr_type itype;
5648 enum attr_itanium_requires_unit0 u0;
5649 enum attr_type stype = p->t[slot];
5653 u0 = ia64_safe_itanium_requires_unit0 (insn);
5654 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5657 for (i = sched_data.first_slot; i < slot; i++)
5658 if (p->t[i] == stype
5659 || (stype == TYPE_F && p->t[i] == TYPE_L)
5660 || (stype == TYPE_I && p->t[i] == TYPE_X))
5663 if (GET_CODE (insn) == CALL_INSN)
5665 /* Reject calls in multiway branch packets. We want to limit
5666 the number of multiway branches we generate (since the branch
5667 predictor is limited), and this seems to work fairly well.
5668 (If we didn't do this, we'd have to add another test here to
5669 force calls into the third slot of the bundle.) */
5672 if (p->t[1] == TYPE_B)
5677 if (p->t[4] == TYPE_B)
5685 if (itype == TYPE_A)
5686 return stype == TYPE_M || stype == TYPE_I;
5690 /* Like emit_insn_before, but skip cycle_display notes.
5691 ??? When cycle display notes are implemented, update this. */
5694 ia64_emit_insn_before (insn, before)
5697 emit_insn_before (insn, before);
5700 /* When rotating a bundle out of the issue window, insert a bundle selector
5701 insn in front of it. DUMP is the scheduling dump file or NULL. START
5702 is either 0 or 3, depending on whether we want to emit a bundle selector
5703 for the first bundle or the second bundle in the current issue window.
5705 The selector insns are emitted this late because the selected packet can
5706 be changed until parts of it get rotated out. */
5709 finish_last_head (dump, start)
5713 const struct ia64_packet *p = sched_data.packet;
5714 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5715 int bundle_type = b - bundle;
5719 if (! ia64_final_schedule)
5722 for (i = start; sched_data.insns[i] == 0; i++)
5725 insn = sched_data.insns[i];
5728 fprintf (dump, "// Emitting template before %d: %s\n",
5729 INSN_UID (insn), b->name);
5731 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5734 /* We can't schedule more insns this cycle. Fix up the scheduling state
5735 and advance FIRST_SLOT and CUR.
5736 We have to distribute the insns that are currently found between
5737 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5738 far, they are stored successively in the fields starting at FIRST_SLOT;
5739 now they must be moved to the correct slots.
5740 DUMP is the current scheduling dump file, or NULL. */
5743 cycle_end_fill_slots (dump)
5746 const struct ia64_packet *packet = sched_data.packet;
5748 enum attr_type tmp_types[6];
5751 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5752 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5754 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5756 enum attr_type t = tmp_types[i];
5757 if (t != ia64_safe_type (tmp_insns[i]))
5759 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5761 if (slot > sched_data.split)
5764 fprintf (dump, "// Packet needs %s, have %s\n",
5765 type_names[packet->t[slot]], type_names[t]);
5766 sched_data.types[slot] = packet->t[slot];
5767 sched_data.insns[slot] = 0;
5768 sched_data.stopbit[slot] = 0;
5770 /* ??? TYPE_L instructions always fill up two slots, but we don't
5771 support TYPE_L nops. */
5772 if (packet->t[slot] == TYPE_L)
5778 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5779 actual slot type later. */
5780 sched_data.types[slot] = packet->t[slot];
5781 sched_data.insns[slot] = tmp_insns[i];
5782 sched_data.stopbit[slot] = 0;
5785 /* TYPE_L instructions always fill up two slots. */
5788 sched_data.types[slot] = packet->t[slot];
5789 sched_data.insns[slot] = 0;
5790 sched_data.stopbit[slot] = 0;
5795 /* This isn't right - there's no need to pad out until the forced split;
5796 the CPU will automatically split if an insn isn't ready. */
5798 while (slot < sched_data.split)
5800 sched_data.types[slot] = packet->t[slot];
5801 sched_data.insns[slot] = 0;
5802 sched_data.stopbit[slot] = 0;
5807 sched_data.first_slot = sched_data.cur = slot;
5810 /* Bundle rotations, as described in the Itanium optimization manual.
5811 We can rotate either one or both bundles out of the issue window.
5812 DUMP is the current scheduling dump file, or NULL. */
5815 rotate_one_bundle (dump)
5819 fprintf (dump, "// Rotating one bundle.\n");
5821 finish_last_head (dump, 0);
5822 if (sched_data.cur > 3)
5824 sched_data.cur -= 3;
5825 sched_data.first_slot -= 3;
5826 memmove (sched_data.types,
5827 sched_data.types + 3,
5828 sched_data.cur * sizeof *sched_data.types);
5829 memmove (sched_data.stopbit,
5830 sched_data.stopbit + 3,
5831 sched_data.cur * sizeof *sched_data.stopbit);
5832 memmove (sched_data.insns,
5833 sched_data.insns + 3,
5834 sched_data.cur * sizeof *sched_data.insns);
5836 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
5841 sched_data.first_slot = 0;
5846 rotate_two_bundles (dump)
5850 fprintf (dump, "// Rotating two bundles.\n");
5852 if (sched_data.cur == 0)
5855 finish_last_head (dump, 0);
5856 if (sched_data.cur > 3)
5857 finish_last_head (dump, 3);
5859 sched_data.first_slot = 0;
5862 /* We're beginning a new block. Initialize data structures as necessary. */
5865 ia64_sched_init (dump, sched_verbose, max_ready)
5866 FILE *dump ATTRIBUTE_UNUSED;
5867 int sched_verbose ATTRIBUTE_UNUSED;
5870 static int initialized = 0;
5878 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5880 const struct bundle *t1 = bundle + b1;
5881 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5883 const struct bundle *t2 = bundle + b2;
5889 for (i = 0; i < NR_PACKETS; i++)
5892 for (j = 0; j < 3; j++)
5893 packets[i].t[j] = packets[i].t1->t[j];
5894 for (j = 0; j < 3; j++)
5895 packets[i].t[j + 3] = packets[i].t2->t[j];
5896 packets[i].first_split = itanium_split_issue (packets + i, 0);
5901 init_insn_group_barriers ();
5903 memset (&sched_data, 0, sizeof sched_data);
5904 sched_types = (enum attr_type *) xmalloc (max_ready
5905 * sizeof (enum attr_type));
5906 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5909 /* See if the packet P can match the insns we have already scheduled. Return
5910 nonzero if so. In *PSLOT, we store the first slot that is available for
5911 more instructions if we choose this packet.
5912 SPLIT holds the last slot we can use, there's a split issue after it so
5913 scheduling beyond it would cause us to use more than one cycle. */
5916 packet_matches_p (p, split, pslot)
5917 const struct ia64_packet *p;
5921 int filled = sched_data.cur;
5922 int first = sched_data.first_slot;
5925 /* First, check if the first of the two bundles must be a specific one (due
5927 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5929 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5932 for (i = 0; i < first; i++)
5933 if (! insn_matches_slot (p, sched_data.types[i], i,
5934 sched_data.insns[i]))
5936 for (i = slot = first; i < filled; i++)
5938 while (slot < split)
5940 if (insn_matches_slot (p, sched_data.types[i], slot,
5941 sched_data.insns[i]))
5955 /* A frontend for itanium_split_issue. For a packet P and a slot
5956 number FIRST that describes the start of the current clock cycle,
5957 return the slot number of the first split issue. This function
5958 uses the cached number found in P if possible. */
5961 get_split (p, first)
5962 const struct ia64_packet *p;
5966 return p->first_split;
5967 return itanium_split_issue (p, first);
5970 /* Given N_READY insns in the array READY, whose types are found in the
5971 corresponding array TYPES, return the insn that is best suited to be
5972 scheduled in slot SLOT of packet P. */
5975 find_best_insn (ready, types, n_ready, p, slot)
5977 enum attr_type *types;
5979 const struct ia64_packet *p;
5984 while (n_ready-- > 0)
5986 rtx insn = ready[n_ready];
5989 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5991 /* If we have equally good insns, one of which has a stricter
5992 slot requirement, prefer the one with the stricter requirement. */
5993 if (best >= 0 && types[n_ready] == TYPE_A)
5995 if (insn_matches_slot (p, types[n_ready], slot, insn))
5998 best_pri = INSN_PRIORITY (ready[best]);
6000 /* If there's no way we could get a stricter requirement, stop
6002 if (types[n_ready] != TYPE_A
6003 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
6011 /* Select the best packet to use given the current scheduler state and the
6013 READY is an array holding N_READY ready insns; TYPES is a corresponding
6014 array that holds their types. Store the best packet in *PPACKET and the
6015 number of insns that can be scheduled in the current cycle in *PBEST. */
6018 find_best_packet (pbest, ppacket, ready, types, n_ready)
6020 const struct ia64_packet **ppacket;
6022 enum attr_type *types;
6025 int first = sched_data.first_slot;
6028 const struct ia64_packet *best_packet = NULL;
6031 for (i = 0; i < NR_PACKETS; i++)
6033 const struct ia64_packet *p = packets + i;
6035 int split = get_split (p, first);
6037 int first_slot, last_slot;
6040 if (! packet_matches_p (p, split, &first_slot))
6043 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
6047 for (slot = first_slot; slot < split; slot++)
6051 /* Disallow a degenerate case where the first bundle doesn't
6052 contain anything but NOPs! */
6053 if (first_slot == 0 && win == 0 && slot == 3)
6059 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
6062 sched_ready[insn_nr] = 0;
6066 else if (p->t[slot] == TYPE_B)
6069 /* We must disallow MBB/BBB packets if any of their B slots would be
6070 filled with nops. */
6073 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
6078 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
6083 || (win == best && last_slot < lowest_end))
6086 lowest_end = last_slot;
6091 *ppacket = best_packet;
6094 /* Reorder the ready list so that the insns that can be issued in this cycle
6095 are found in the correct order at the end of the list.
6096 DUMP is the scheduling dump file, or NULL. READY points to the start,
6097 E_READY to the end of the ready list. MAY_FAIL determines what should be
6098 done if no insns can be scheduled in this cycle: if it is zero, we abort,
6099 otherwise we return 0.
6100 Return 1 if any insns can be scheduled in this cycle. */
6103 itanium_reorder (dump, ready, e_ready, may_fail)
6109 const struct ia64_packet *best_packet;
6110 int n_ready = e_ready - ready;
6111 int first = sched_data.first_slot;
6112 int i, best, best_split, filled;
6114 for (i = 0; i < n_ready; i++)
6115 sched_types[i] = ia64_safe_type (ready[i]);
6117 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
6128 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
6129 best_packet->t1->name,
6130 best_packet->t2 ? best_packet->t2->name : NULL, best);
6133 best_split = itanium_split_issue (best_packet, first);
6134 packet_matches_p (best_packet, best_split, &filled);
6136 for (i = filled; i < best_split; i++)
6140 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
6143 rtx insn = ready[insn_nr];
6144 memmove (ready + insn_nr, ready + insn_nr + 1,
6145 (n_ready - insn_nr - 1) * sizeof (rtx));
6146 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
6147 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
6148 ready[--n_ready] = insn;
6152 sched_data.packet = best_packet;
6153 sched_data.split = best_split;
6157 /* Dump information about the current scheduling state to file DUMP. */
6160 dump_current_packet (dump)
6164 fprintf (dump, "// %d slots filled:", sched_data.cur);
6165 for (i = 0; i < sched_data.first_slot; i++)
6167 rtx insn = sched_data.insns[i];
6168 fprintf (dump, " %s", type_names[sched_data.types[i]]);
6170 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
6171 if (sched_data.stopbit[i])
6172 fprintf (dump, " ;;");
6174 fprintf (dump, " :::");
6175 for (i = sched_data.first_slot; i < sched_data.cur; i++)
6177 rtx insn = sched_data.insns[i];
6178 enum attr_type t = ia64_safe_type (insn);
6179 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
6181 fprintf (dump, "\n");
6184 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
6188 schedule_stop (dump)
6191 const struct ia64_packet *best = sched_data.packet;
6196 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6198 if (sched_data.cur == 0)
6201 fprintf (dump, "// At start of bundle, so nothing to do.\n");
6203 rotate_two_bundles (NULL);
6207 for (i = -1; i < NR_PACKETS; i++)
6209 /* This is a slight hack to give the current packet the first chance.
6210 This is done to avoid e.g. switching from MIB to MBB bundles. */
6211 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6212 int split = get_split (p, sched_data.first_slot);
6213 const struct bundle *compare;
6216 if (! packet_matches_p (p, split, &next))
6219 compare = next > 3 ? p->t2 : p->t1;
6222 if (compare->possible_stop)
6223 stoppos = compare->possible_stop;
6227 if (stoppos < next || stoppos >= best_stop)
6229 if (compare->possible_stop == 0)
6231 stoppos = (next > 3 ? 6 : 3);
6233 if (stoppos < next || stoppos >= best_stop)
6237 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6238 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6241 best_stop = stoppos;
6245 sched_data.packet = best;
6246 cycle_end_fill_slots (dump);
6247 while (sched_data.cur < best_stop)
6249 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6250 sched_data.insns[sched_data.cur] = 0;
6251 sched_data.stopbit[sched_data.cur] = 0;
6254 sched_data.stopbit[sched_data.cur - 1] = 1;
6255 sched_data.first_slot = best_stop;
6258 dump_current_packet (dump);
6261 /* If necessary, perform one or two rotations on the scheduling state.
6262 This should only be called if we are starting a new cycle. */
6268 cycle_end_fill_slots (dump);
6269 if (sched_data.cur == 6)
6270 rotate_two_bundles (dump);
6271 else if (sched_data.cur >= 3)
6272 rotate_one_bundle (dump);
6273 sched_data.first_slot = sched_data.cur;
6276 /* The clock cycle when ia64_sched_reorder was last called. */
6277 static int prev_cycle;
6279 /* The first insn scheduled in the previous cycle. This is the saved
6280 value of sched_data.first_slot. */
6281 static int prev_first;
6283 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6284 pad out the delay between MM (shifts, etc.) and integer operations. */
6287 nop_cycles_until (clock_var, dump)
6291 int prev_clock = prev_cycle;
6292 int cycles_left = clock_var - prev_clock;
6293 bool did_stop = false;
6295 /* Finish the previous cycle; pad it out with NOPs. */
6296 if (sched_data.cur == 3)
6298 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6300 maybe_rotate (dump);
6302 else if (sched_data.cur > 0)
6305 int split = itanium_split_issue (sched_data.packet, prev_first);
6307 if (sched_data.cur < 3 && split > 3)
6313 if (split > sched_data.cur)
6316 for (i = sched_data.cur; i < split; i++)
6318 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6319 sched_data.types[i] = sched_data.packet->t[i];
6320 sched_data.insns[i] = t;
6321 sched_data.stopbit[i] = 0;
6323 sched_data.cur = split;
6326 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6330 for (i = sched_data.cur; i < 6; i++)
6332 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6333 sched_data.types[i] = sched_data.packet->t[i];
6334 sched_data.insns[i] = t;
6335 sched_data.stopbit[i] = 0;
6342 if (need_stop || sched_data.cur == 6)
6344 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6347 maybe_rotate (dump);
6351 while (cycles_left > 0)
6353 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6354 sched_emit_insn (gen_nop_type (TYPE_M));
6355 sched_emit_insn (gen_nop_type (TYPE_I));
6356 if (cycles_left > 1)
6358 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6361 sched_emit_insn (gen_nop_type (TYPE_I));
6362 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6368 init_insn_group_barriers ();
6371 /* We are about to being issuing insns for this clock cycle.
6372 Override the default sort algorithm to better slot instructions. */
6375 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6376 reorder_type, clock_var)
6377 FILE *dump ATTRIBUTE_UNUSED;
6378 int sched_verbose ATTRIBUTE_UNUSED;
6381 int reorder_type, clock_var;
6384 int n_ready = *pn_ready;
6385 rtx *e_ready = ready + n_ready;
6390 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6391 dump_current_packet (dump);
6394 /* Work around the pipeline flush that will occurr if the results of
6395 an MM instruction are accessed before the result is ready. Intel
6396 documentation says this only happens with IALU, ISHF, ILOG, LD,
6397 and ST consumers, but experimental evidence shows that *any* non-MM
6398 type instruction will incurr the flush. */
6399 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6401 for (insnp = ready; insnp < e_ready; insnp++)
6403 rtx insn = *insnp, link;
6404 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6406 if (t == ITANIUM_CLASS_MMMUL
6407 || t == ITANIUM_CLASS_MMSHF
6408 || t == ITANIUM_CLASS_MMSHFI)
6411 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6412 if (REG_NOTE_KIND (link) == 0)
6414 rtx other = XEXP (link, 0);
6415 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6416 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6418 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6426 prev_first = sched_data.first_slot;
6427 prev_cycle = clock_var;
6429 if (reorder_type == 0)
6430 maybe_rotate (sched_verbose ? dump : NULL);
6432 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6434 for (insnp = ready; insnp < e_ready; insnp++)
6435 if (insnp < e_ready)
6438 enum attr_type t = ia64_safe_type (insn);
6439 if (t == TYPE_UNKNOWN)
6441 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6442 || asm_noperands (PATTERN (insn)) >= 0)
6444 rtx lowest = ready[n_asms];
6445 ready[n_asms] = insn;
6451 rtx highest = ready[n_ready - 1];
6452 ready[n_ready - 1] = insn;
6454 if (ia64_final_schedule && group_barrier_needed_p (insn))
6456 schedule_stop (sched_verbose ? dump : NULL);
6457 sched_data.last_was_stop = 1;
6458 maybe_rotate (sched_verbose ? dump : NULL);
6465 if (n_asms < n_ready)
6467 /* Some normal insns to process. Skip the asms. */
6471 else if (n_ready > 0)
6473 /* Only asm insns left. */
6474 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6476 schedule_stop (sched_verbose ? dump : NULL);
6477 sched_data.last_was_stop = 1;
6478 maybe_rotate (sched_verbose ? dump : NULL);
6480 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6484 if (ia64_final_schedule)
6486 int nr_need_stop = 0;
6488 for (insnp = ready; insnp < e_ready; insnp++)
6489 if (safe_group_barrier_needed_p (*insnp))
6492 /* Schedule a stop bit if
6493 - all insns require a stop bit, or
6494 - we are starting a new cycle and _any_ insns require a stop bit.
6495 The reason for the latter is that if our schedule is accurate, then
6496 the additional stop won't decrease performance at this point (since
6497 there's a split issue at this point anyway), but it gives us more
6498 freedom when scheduling the currently ready insns. */
6499 if ((reorder_type == 0 && nr_need_stop)
6500 || (reorder_type == 1 && n_ready == nr_need_stop))
6502 schedule_stop (sched_verbose ? dump : NULL);
6503 sched_data.last_was_stop = 1;
6504 maybe_rotate (sched_verbose ? dump : NULL);
6505 if (reorder_type == 1)
6512 /* Move down everything that needs a stop bit, preserving relative
6514 while (insnp-- > ready + deleted)
6515 while (insnp >= ready + deleted)
6518 if (! safe_group_barrier_needed_p (insn))
6520 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6526 if (deleted != nr_need_stop)
6531 return itanium_reorder (sched_verbose ? dump : NULL,
6532 ready, e_ready, reorder_type == 1);
6536 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6543 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6544 pn_ready, 0, clock_var);
6547 /* Like ia64_sched_reorder, but called after issuing each insn.
6548 Override the default sort algorithm to better slot instructions. */
6551 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6552 FILE *dump ATTRIBUTE_UNUSED;
6553 int sched_verbose ATTRIBUTE_UNUSED;
6558 if (sched_data.last_was_stop)
6561 /* Detect one special case and try to optimize it.
6562 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6563 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6564 if (sched_data.first_slot == 1
6565 && sched_data.stopbit[0]
6566 && ((sched_data.cur == 4
6567 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6568 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6569 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6570 || (sched_data.cur == 3
6571 && (sched_data.types[1] == TYPE_M
6572 || sched_data.types[1] == TYPE_A)
6573 && (sched_data.types[2] != TYPE_M
6574 && sched_data.types[2] != TYPE_I
6575 && sched_data.types[2] != TYPE_A))))
6579 rtx stop = sched_data.insns[1];
6581 /* Search backward for the stop bit that must be there. */
6586 stop = PREV_INSN (stop);
6587 if (GET_CODE (stop) != INSN)
6589 insn_code = recog_memoized (stop);
6591 /* Ignore .pred.rel.mutex.
6593 ??? Update this to ignore cycle display notes too
6594 ??? once those are implemented */
6595 if (insn_code == CODE_FOR_pred_rel_mutex
6596 || insn_code == CODE_FOR_prologue_use)
6599 if (insn_code == CODE_FOR_insn_group_barrier)
6604 /* Adjust the stop bit's slot selector. */
6605 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6607 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6609 sched_data.stopbit[0] = 0;
6610 sched_data.stopbit[2] = 1;
6612 sched_data.types[5] = sched_data.types[3];
6613 sched_data.types[4] = sched_data.types[2];
6614 sched_data.types[3] = sched_data.types[1];
6615 sched_data.insns[5] = sched_data.insns[3];
6616 sched_data.insns[4] = sched_data.insns[2];
6617 sched_data.insns[3] = sched_data.insns[1];
6618 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6619 sched_data.cur += 2;
6620 sched_data.first_slot = 3;
6621 for (i = 0; i < NR_PACKETS; i++)
6623 const struct ia64_packet *p = packets + i;
6624 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6626 sched_data.packet = p;
6630 rotate_one_bundle (sched_verbose ? dump : NULL);
6633 for (i = 0; i < NR_PACKETS; i++)
6635 const struct ia64_packet *p = packets + i;
6636 int split = get_split (p, sched_data.first_slot);
6639 /* Disallow multiway branches here. */
6640 if (p->t[1] == TYPE_B)
6643 if (packet_matches_p (p, split, &next) && next < best)
6646 sched_data.packet = p;
6647 sched_data.split = split;
6656 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6661 /* Did we schedule a stop? If so, finish this cycle. */
6662 if (sched_data.cur == sched_data.first_slot)
6667 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6669 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6671 dump_current_packet (dump);
6675 /* We are about to issue INSN. Return the number of insns left on the
6676 ready queue that can be issued this cycle. */
6679 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6683 int can_issue_more ATTRIBUTE_UNUSED;
6685 enum attr_type t = ia64_safe_type (insn);
6687 if (sched_data.last_was_stop)
6689 int t = sched_data.first_slot;
6692 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6693 init_insn_group_barriers ();
6694 sched_data.last_was_stop = 0;
6697 if (t == TYPE_UNKNOWN)
6700 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6701 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6702 || asm_noperands (PATTERN (insn)) >= 0)
6704 /* This must be some kind of asm. Clear the scheduling state. */
6705 rotate_two_bundles (sched_verbose ? dump : NULL);
6706 if (ia64_final_schedule)
6707 group_barrier_needed_p (insn);
6712 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6713 important state info. Don't delete this test. */
6714 if (ia64_final_schedule
6715 && group_barrier_needed_p (insn))
6718 sched_data.stopbit[sched_data.cur] = 0;
6719 sched_data.insns[sched_data.cur] = insn;
6720 sched_data.types[sched_data.cur] = t;
6724 fprintf (dump, "// Scheduling insn %d of type %s\n",
6725 INSN_UID (insn), type_names[t]);
6727 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6729 schedule_stop (sched_verbose ? dump : NULL);
6730 sched_data.last_was_stop = 1;
6736 /* Free data allocated by ia64_sched_init. */
6739 ia64_sched_finish (dump, sched_verbose)
6744 fprintf (dump, "// Finishing schedule.\n");
6745 rotate_two_bundles (NULL);
6750 /* Emit pseudo-ops for the assembler to describe predicate relations.
6751 At present this assumes that we only consider predicate pairs to
6752 be mutex, and that the assembler can deduce proper values from
6753 straight-line code. */
6756 emit_predicate_relation_info ()
6760 FOR_EACH_BB_REVERSE (bb)
6763 rtx head = bb->head;
6765 /* We only need such notes at code labels. */
6766 if (GET_CODE (head) != CODE_LABEL)
6768 if (GET_CODE (NEXT_INSN (head)) == NOTE
6769 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6770 head = NEXT_INSN (head);
6772 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6773 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6775 rtx p = gen_rtx_REG (BImode, r);
6776 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6777 if (head == bb->end)
6783 /* Look for conditional calls that do not return, and protect predicate
6784 relations around them. Otherwise the assembler will assume the call
6785 returns, and complain about uses of call-clobbered predicates after
6787 FOR_EACH_BB_REVERSE (bb)
6789 rtx insn = bb->head;
6793 if (GET_CODE (insn) == CALL_INSN
6794 && GET_CODE (PATTERN (insn)) == COND_EXEC
6795 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6797 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6798 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6799 if (bb->head == insn)
6801 if (bb->end == insn)
6805 if (insn == bb->end)
6807 insn = NEXT_INSN (insn);
6812 /* Generate a NOP instruction of type T. We will never generate L type
6822 return gen_nop_m ();
6824 return gen_nop_i ();
6826 return gen_nop_b ();
6828 return gen_nop_f ();
6830 return gen_nop_x ();
6836 /* After the last scheduling pass, fill in NOPs. It's easier to do this
6837 here than while scheduling. */
6843 const struct bundle *b = 0;
6846 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6850 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6851 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6853 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
6854 || GET_CODE (insn) == CODE_LABEL)
6857 while (bundle_pos < 3)
6859 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6862 if (GET_CODE (insn) != CODE_LABEL)
6863 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6869 else if (GET_CODE (pat) == UNSPEC_VOLATILE
6870 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
6872 int t = INTVAL (XVECEXP (pat, 0, 0));
6874 while (bundle_pos < t)
6876 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6882 if (bundle_pos == 3)
6885 if (b && INSN_P (insn))
6887 t = ia64_safe_type (insn);
6888 if (asm_noperands (PATTERN (insn)) >= 0
6889 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6891 while (bundle_pos < 3)
6893 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6899 if (t == TYPE_UNKNOWN)
6901 while (bundle_pos < 3)
6903 if (t == b->t[bundle_pos]
6904 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6905 || b->t[bundle_pos] == TYPE_I)))
6908 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6917 /* Perform machine dependent operations on the rtl chain INSNS. */
6923 /* We are freeing block_for_insn in the toplev to keep compatibility
6924 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6925 compute_bb_for_insn ();
6927 /* If optimizing, we'll have split before scheduling. */
6929 split_all_insns (0);
6931 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6932 non-optimizing bootstrap. */
6933 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
6935 if (ia64_flag_schedule_insns2)
6937 timevar_push (TV_SCHED2);
6938 ia64_final_schedule = 1;
6939 schedule_ebbs (rtl_dump_file);
6940 ia64_final_schedule = 0;
6941 timevar_pop (TV_SCHED2);
6943 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6944 place as they were during scheduling. */
6945 emit_insn_group_barriers (rtl_dump_file, insns);
6949 emit_all_insn_group_barriers (rtl_dump_file, insns);
6951 /* A call must not be the last instruction in a function, so that the
6952 return address is still within the function, so that unwinding works
6953 properly. Note that IA-64 differs from dwarf2 on this point. */
6954 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6959 insn = get_last_insn ();
6960 if (! INSN_P (insn))
6961 insn = prev_active_insn (insn);
6962 if (GET_CODE (insn) == INSN
6963 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6964 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6967 insn = prev_active_insn (insn);
6969 if (GET_CODE (insn) == CALL_INSN)
6972 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6973 emit_insn (gen_break_f ());
6974 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6979 emit_predicate_relation_info ();
6982 /* Return true if REGNO is used by the epilogue. */
6985 ia64_epilogue_uses (regno)
6991 /* When a function makes a call through a function descriptor, we
6992 will write a (potentially) new value to "gp". After returning
6993 from such a call, we need to make sure the function restores the
6994 original gp-value, even if the function itself does not use the
6996 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6998 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6999 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7000 /* For functions defined with the syscall_linkage attribute, all
7001 input registers are marked as live at all function exits. This
7002 prevents the register allocator from using the input registers,
7003 which in turn makes it possible to restart a system call after
7004 an interrupt without having to save/restore the input registers.
7005 This also prevents kernel data from leaking to application code. */
7006 return lookup_attribute ("syscall_linkage",
7007 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7010 /* Conditional return patterns can't represent the use of `b0' as
7011 the return address, so we force the value live this way. */
7015 /* Likewise for ar.pfs, which is used by br.ret. */
7023 /* Return true if REGNO is used by the frame unwinder. */
7026 ia64_eh_uses (regno)
7029 if (! reload_completed)
7032 if (current_frame_info.reg_save_b0
7033 && regno == current_frame_info.reg_save_b0)
7035 if (current_frame_info.reg_save_pr
7036 && regno == current_frame_info.reg_save_pr)
7038 if (current_frame_info.reg_save_ar_pfs
7039 && regno == current_frame_info.reg_save_ar_pfs)
7041 if (current_frame_info.reg_save_ar_unat
7042 && regno == current_frame_info.reg_save_ar_unat)
7044 if (current_frame_info.reg_save_ar_lc
7045 && regno == current_frame_info.reg_save_ar_lc)
7051 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7053 We add @ to the name if this goes in small data/bss. We can only put
7054 a variable in small data/bss if it is defined in this module or a module
7055 that we are statically linked with. We can't check the second condition,
7056 but TREE_STATIC gives us the first one. */
7058 /* ??? If we had IPA, we could check the second condition. We could support
7059 programmer added section attributes if the variable is not defined in this
7062 /* ??? See the v850 port for a cleaner way to do this. */
7064 /* ??? We could also support own long data here. Generating movl/add/ld8
7065 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7066 code faster because there is one less load. This also includes incomplete
7067 types which can't go in sdata/sbss. */
7070 ia64_in_small_data_p (exp)
7073 if (TARGET_NO_SDATA)
7076 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7078 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7079 if (strcmp (section, ".sdata") == 0
7080 || strcmp (section, ".sbss") == 0)
7085 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7087 /* If this is an incomplete type with size 0, then we can't put it
7088 in sdata because it might be too big when completed. */
7089 if (size > 0 && size <= ia64_section_threshold)
7097 ia64_encode_section_info (decl, first)
7099 int first ATTRIBUTE_UNUSED;
7101 const char *symbol_str;
7106 if (TREE_CODE (decl) == FUNCTION_DECL)
7108 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
7112 /* Careful not to prod global register variables. */
7113 if (TREE_CODE (decl) != VAR_DECL
7114 || GET_CODE (DECL_RTL (decl)) != MEM
7115 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
7118 symbol = XEXP (DECL_RTL (decl), 0);
7119 symbol_str = XSTR (symbol, 0);
7121 is_local = (*targetm.binds_local_p) (decl);
7123 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7125 enum tls_model kind;
7129 kind = TLS_MODEL_LOCAL_EXEC;
7131 kind = TLS_MODEL_INITIAL_EXEC;
7134 kind = TLS_MODEL_LOCAL_DYNAMIC;
7136 kind = TLS_MODEL_GLOBAL_DYNAMIC;
7137 if (kind < flag_tls_default)
7138 kind = flag_tls_default;
7140 encoding = " GLil"[kind];
7142 /* Determine if DECL will wind up in .sdata/.sbss. */
7143 else if (is_local && ia64_in_small_data_p (decl))
7146 /* Finally, encode this into the symbol string. */
7152 if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7154 if (encoding == symbol_str[1])
7156 /* ??? Sdata became thread or thread becaome not thread. Lose. */
7160 len = strlen (symbol_str);
7161 newstr = alloca (len + 3);
7162 newstr[0] = ENCODE_SECTION_INFO_CHAR;
7163 newstr[1] = encoding;
7164 memcpy (newstr + 2, symbol_str, len + 1);
7166 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2);
7169 /* This decl is marked as being in small data/bss but it shouldn't be;
7170 one likely explanation for this is that the decl has been moved into
7171 a different section from the one it was in when encode_section_info
7172 was first called. Remove the encoding. */
7173 else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7174 XSTR (symbol, 0) = ggc_strdup (symbol_str + 2);
7178 ia64_strip_name_encoding (str)
7181 if (str[0] == ENCODE_SECTION_INFO_CHAR)
7188 /* Output assembly directives for prologue regions. */
7190 /* The current basic block number. */
7192 static bool last_block;
7194 /* True if we need a copy_state command at the start of the next block. */
7196 static bool need_copy_state;
7198 /* The function emits unwind directives for the start of an epilogue. */
7203 /* If this isn't the last block of the function, then we need to label the
7204 current state, and copy it back in at the start of the next block. */
7208 fprintf (asm_out_file, "\t.label_state 1\n");
7209 need_copy_state = true;
7212 fprintf (asm_out_file, "\t.restore sp\n");
7215 /* This function processes a SET pattern looking for specific patterns
7216 which result in emitting an assembly directive required for unwinding. */
7219 process_set (asm_out_file, pat)
7223 rtx src = SET_SRC (pat);
7224 rtx dest = SET_DEST (pat);
7225 int src_regno, dest_regno;
7227 /* Look for the ALLOC insn. */
7228 if (GET_CODE (src) == UNSPEC_VOLATILE
7229 && XINT (src, 1) == UNSPECV_ALLOC
7230 && GET_CODE (dest) == REG)
7232 dest_regno = REGNO (dest);
7234 /* If this isn't the final destination for ar.pfs, the alloc
7235 shouldn't have been marked frame related. */
7236 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7239 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7240 ia64_dbx_register_number (dest_regno));
7244 /* Look for SP = .... */
7245 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7247 if (GET_CODE (src) == PLUS)
7249 rtx op0 = XEXP (src, 0);
7250 rtx op1 = XEXP (src, 1);
7251 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7253 if (INTVAL (op1) < 0)
7255 fputs ("\t.fframe ", asm_out_file);
7256 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7258 fputc ('\n', asm_out_file);
7261 process_epilogue ();
7266 else if (GET_CODE (src) == REG
7267 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7268 process_epilogue ();
7275 /* Register move we need to look at. */
7276 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7278 src_regno = REGNO (src);
7279 dest_regno = REGNO (dest);
7284 /* Saving return address pointer. */
7285 if (dest_regno != current_frame_info.reg_save_b0)
7287 fprintf (asm_out_file, "\t.save rp, r%d\n",
7288 ia64_dbx_register_number (dest_regno));
7292 if (dest_regno != current_frame_info.reg_save_pr)
7294 fprintf (asm_out_file, "\t.save pr, r%d\n",
7295 ia64_dbx_register_number (dest_regno));
7298 case AR_UNAT_REGNUM:
7299 if (dest_regno != current_frame_info.reg_save_ar_unat)
7301 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7302 ia64_dbx_register_number (dest_regno));
7306 if (dest_regno != current_frame_info.reg_save_ar_lc)
7308 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7309 ia64_dbx_register_number (dest_regno));
7312 case STACK_POINTER_REGNUM:
7313 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7314 || ! frame_pointer_needed)
7316 fprintf (asm_out_file, "\t.vframe r%d\n",
7317 ia64_dbx_register_number (dest_regno));
7321 /* Everything else should indicate being stored to memory. */
7326 /* Memory store we need to look at. */
7327 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7333 if (GET_CODE (XEXP (dest, 0)) == REG)
7335 base = XEXP (dest, 0);
7338 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7339 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7341 base = XEXP (XEXP (dest, 0), 0);
7342 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7347 if (base == hard_frame_pointer_rtx)
7349 saveop = ".savepsp";
7352 else if (base == stack_pointer_rtx)
7357 src_regno = REGNO (src);
7361 if (current_frame_info.reg_save_b0 != 0)
7363 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7367 if (current_frame_info.reg_save_pr != 0)
7369 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7373 if (current_frame_info.reg_save_ar_lc != 0)
7375 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7379 if (current_frame_info.reg_save_ar_pfs != 0)
7381 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7384 case AR_UNAT_REGNUM:
7385 if (current_frame_info.reg_save_ar_unat != 0)
7387 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7394 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7395 1 << (src_regno - GR_REG (4)));
7403 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7404 1 << (src_regno - BR_REG (1)));
7411 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7412 1 << (src_regno - FR_REG (2)));
7415 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7416 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7417 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7418 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7419 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7420 1 << (src_regno - FR_REG (12)));
7432 /* This function looks at a single insn and emits any directives
7433 required to unwind this insn. */
7435 process_for_unwind_directive (asm_out_file, insn)
7439 if (flag_unwind_tables
7440 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7444 if (GET_CODE (insn) == NOTE
7445 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7447 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7449 /* Restore unwind state from immediately before the epilogue. */
7450 if (need_copy_state)
7452 fprintf (asm_out_file, "\t.body\n");
7453 fprintf (asm_out_file, "\t.copy_state 1\n");
7454 need_copy_state = false;
7458 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7461 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7463 pat = XEXP (pat, 0);
7465 pat = PATTERN (insn);
7467 switch (GET_CODE (pat))
7470 process_set (asm_out_file, pat);
7476 int limit = XVECLEN (pat, 0);
7477 for (par_index = 0; par_index < limit; par_index++)
7479 rtx x = XVECEXP (pat, 0, par_index);
7480 if (GET_CODE (x) == SET)
7481 process_set (asm_out_file, x);
7494 ia64_init_builtins ()
7496 tree psi_type_node = build_pointer_type (integer_type_node);
7497 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7499 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7500 tree si_ftype_psi_si_si
7501 = build_function_type_list (integer_type_node,
7502 psi_type_node, integer_type_node,
7503 integer_type_node, NULL_TREE);
7505 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7506 tree di_ftype_pdi_di_di
7507 = build_function_type_list (long_integer_type_node,
7508 pdi_type_node, long_integer_type_node,
7509 long_integer_type_node, NULL_TREE);
7510 /* __sync_synchronize */
7511 tree void_ftype_void
7512 = build_function_type (void_type_node, void_list_node);
7514 /* __sync_lock_test_and_set_si */
7515 tree si_ftype_psi_si
7516 = build_function_type_list (integer_type_node,
7517 psi_type_node, integer_type_node, NULL_TREE);
7519 /* __sync_lock_test_and_set_di */
7520 tree di_ftype_pdi_di
7521 = build_function_type_list (long_integer_type_node,
7522 pdi_type_node, long_integer_type_node,
7525 /* __sync_lock_release_si */
7527 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7529 /* __sync_lock_release_di */
7531 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7533 #define def_builtin(name, type, code) \
7534 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7536 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7537 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7538 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7539 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7540 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7541 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7542 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7543 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7545 def_builtin ("__sync_synchronize", void_ftype_void,
7546 IA64_BUILTIN_SYNCHRONIZE);
7548 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7549 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7550 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7551 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7552 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7553 IA64_BUILTIN_LOCK_RELEASE_SI);
7554 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7555 IA64_BUILTIN_LOCK_RELEASE_DI);
7557 def_builtin ("__builtin_ia64_bsp",
7558 build_function_type (ptr_type_node, void_list_node),
7561 def_builtin ("__builtin_ia64_flushrs",
7562 build_function_type (void_type_node, void_list_node),
7563 IA64_BUILTIN_FLUSHRS);
7565 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7566 IA64_BUILTIN_FETCH_AND_ADD_SI);
7567 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7568 IA64_BUILTIN_FETCH_AND_SUB_SI);
7569 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7570 IA64_BUILTIN_FETCH_AND_OR_SI);
7571 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7572 IA64_BUILTIN_FETCH_AND_AND_SI);
7573 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7574 IA64_BUILTIN_FETCH_AND_XOR_SI);
7575 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7576 IA64_BUILTIN_FETCH_AND_NAND_SI);
7578 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7579 IA64_BUILTIN_ADD_AND_FETCH_SI);
7580 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7581 IA64_BUILTIN_SUB_AND_FETCH_SI);
7582 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7583 IA64_BUILTIN_OR_AND_FETCH_SI);
7584 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7585 IA64_BUILTIN_AND_AND_FETCH_SI);
7586 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7587 IA64_BUILTIN_XOR_AND_FETCH_SI);
7588 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7589 IA64_BUILTIN_NAND_AND_FETCH_SI);
7591 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7592 IA64_BUILTIN_FETCH_AND_ADD_DI);
7593 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7594 IA64_BUILTIN_FETCH_AND_SUB_DI);
7595 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7596 IA64_BUILTIN_FETCH_AND_OR_DI);
7597 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7598 IA64_BUILTIN_FETCH_AND_AND_DI);
7599 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7600 IA64_BUILTIN_FETCH_AND_XOR_DI);
7601 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7602 IA64_BUILTIN_FETCH_AND_NAND_DI);
7604 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7605 IA64_BUILTIN_ADD_AND_FETCH_DI);
7606 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7607 IA64_BUILTIN_SUB_AND_FETCH_DI);
7608 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7609 IA64_BUILTIN_OR_AND_FETCH_DI);
7610 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7611 IA64_BUILTIN_AND_AND_FETCH_DI);
7612 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7613 IA64_BUILTIN_XOR_AND_FETCH_DI);
7614 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7615 IA64_BUILTIN_NAND_AND_FETCH_DI);
7620 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7628 cmpxchgsz.acq tmp = [ptr], tmp
7629 } while (tmp != ret)
7633 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7635 enum machine_mode mode;
7639 rtx ret, label, tmp, ccv, insn, mem, value;
7642 arg0 = TREE_VALUE (arglist);
7643 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7644 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7645 #ifdef POINTERS_EXTEND_UNSIGNED
7646 if (GET_MODE(mem) != Pmode)
7647 mem = convert_memory_address (Pmode, mem);
7649 value = expand_expr (arg1, NULL_RTX, mode, 0);
7651 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7652 MEM_VOLATILE_P (mem) = 1;
7654 if (target && register_operand (target, mode))
7657 ret = gen_reg_rtx (mode);
7659 emit_insn (gen_mf ());
7661 /* Special case for fetchadd instructions. */
7662 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7665 insn = gen_fetchadd_acq_si (ret, mem, value);
7667 insn = gen_fetchadd_acq_di (ret, mem, value);
7672 tmp = gen_reg_rtx (mode);
7673 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7674 emit_move_insn (tmp, mem);
7676 label = gen_label_rtx ();
7678 emit_move_insn (ret, tmp);
7679 emit_move_insn (ccv, tmp);
7681 /* Perform the specific operation. Special case NAND by noticing
7682 one_cmpl_optab instead. */
7683 if (binoptab == one_cmpl_optab)
7685 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7686 binoptab = and_optab;
7688 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7691 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7693 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7696 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7701 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7709 cmpxchgsz.acq tmp = [ptr], ret
7710 } while (tmp != old)
7714 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7716 enum machine_mode mode;
7720 rtx old, label, tmp, ret, ccv, insn, mem, value;
7723 arg0 = TREE_VALUE (arglist);
7724 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7725 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7726 #ifdef POINTERS_EXTEND_UNSIGNED
7727 if (GET_MODE(mem) != Pmode)
7728 mem = convert_memory_address (Pmode, mem);
7731 value = expand_expr (arg1, NULL_RTX, mode, 0);
7733 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7734 MEM_VOLATILE_P (mem) = 1;
7736 if (target && ! register_operand (target, mode))
7739 emit_insn (gen_mf ());
7740 tmp = gen_reg_rtx (mode);
7741 old = gen_reg_rtx (mode);
7742 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7744 emit_move_insn (tmp, mem);
7746 label = gen_label_rtx ();
7748 emit_move_insn (old, tmp);
7749 emit_move_insn (ccv, tmp);
7751 /* Perform the specific operation. Special case NAND by noticing
7752 one_cmpl_optab instead. */
7753 if (binoptab == one_cmpl_optab)
7755 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7756 binoptab = and_optab;
7758 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7761 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7763 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7766 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7771 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7775 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7778 For bool_ it's the same except return ret == oldval.
7782 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7783 enum machine_mode mode;
7788 tree arg0, arg1, arg2;
7789 rtx mem, old, new, ccv, tmp, insn;
7791 arg0 = TREE_VALUE (arglist);
7792 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7793 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7794 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7795 old = expand_expr (arg1, NULL_RTX, mode, 0);
7796 new = expand_expr (arg2, NULL_RTX, mode, 0);
7798 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7799 MEM_VOLATILE_P (mem) = 1;
7801 if (! register_operand (old, mode))
7802 old = copy_to_mode_reg (mode, old);
7803 if (! register_operand (new, mode))
7804 new = copy_to_mode_reg (mode, new);
7806 if (! boolp && target && register_operand (target, mode))
7809 tmp = gen_reg_rtx (mode);
7811 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7812 emit_move_insn (ccv, old);
7813 emit_insn (gen_mf ());
7815 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7817 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7823 target = gen_reg_rtx (mode);
7824 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7830 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7833 ia64_expand_lock_test_and_set (mode, arglist, target)
7834 enum machine_mode mode;
7839 rtx mem, new, ret, insn;
7841 arg0 = TREE_VALUE (arglist);
7842 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7843 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7844 new = expand_expr (arg1, NULL_RTX, mode, 0);
7846 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7847 MEM_VOLATILE_P (mem) = 1;
7848 if (! register_operand (new, mode))
7849 new = copy_to_mode_reg (mode, new);
7851 if (target && register_operand (target, mode))
7854 ret = gen_reg_rtx (mode);
7857 insn = gen_xchgsi (ret, mem, new);
7859 insn = gen_xchgdi (ret, mem, new);
7865 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7868 ia64_expand_lock_release (mode, arglist, target)
7869 enum machine_mode mode;
7871 rtx target ATTRIBUTE_UNUSED;
7876 arg0 = TREE_VALUE (arglist);
7877 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7879 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7880 MEM_VOLATILE_P (mem) = 1;
7882 emit_move_insn (mem, const0_rtx);
7888 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7891 rtx subtarget ATTRIBUTE_UNUSED;
7892 enum machine_mode mode ATTRIBUTE_UNUSED;
7893 int ignore ATTRIBUTE_UNUSED;
7895 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7896 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7897 tree arglist = TREE_OPERAND (exp, 1);
7901 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7902 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7903 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7904 case IA64_BUILTIN_LOCK_RELEASE_SI:
7905 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7906 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7907 case IA64_BUILTIN_FETCH_AND_OR_SI:
7908 case IA64_BUILTIN_FETCH_AND_AND_SI:
7909 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7910 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7911 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7912 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7913 case IA64_BUILTIN_OR_AND_FETCH_SI:
7914 case IA64_BUILTIN_AND_AND_FETCH_SI:
7915 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7916 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7920 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7921 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7922 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7923 case IA64_BUILTIN_LOCK_RELEASE_DI:
7924 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7925 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7926 case IA64_BUILTIN_FETCH_AND_OR_DI:
7927 case IA64_BUILTIN_FETCH_AND_AND_DI:
7928 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7929 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7930 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7931 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7932 case IA64_BUILTIN_OR_AND_FETCH_DI:
7933 case IA64_BUILTIN_AND_AND_FETCH_DI:
7934 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7935 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7945 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7946 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7947 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7949 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
7950 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7951 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
7953 case IA64_BUILTIN_SYNCHRONIZE:
7954 emit_insn (gen_mf ());
7957 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7958 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7959 return ia64_expand_lock_test_and_set (mode, arglist, target);
7961 case IA64_BUILTIN_LOCK_RELEASE_SI:
7962 case IA64_BUILTIN_LOCK_RELEASE_DI:
7963 return ia64_expand_lock_release (mode, arglist, target);
7965 case IA64_BUILTIN_BSP:
7966 if (! target || ! register_operand (target, DImode))
7967 target = gen_reg_rtx (DImode);
7968 emit_insn (gen_bsp_value (target));
7971 case IA64_BUILTIN_FLUSHRS:
7972 emit_insn (gen_flushrs ());
7975 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7976 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7977 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7979 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7980 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7981 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7983 case IA64_BUILTIN_FETCH_AND_OR_SI:
7984 case IA64_BUILTIN_FETCH_AND_OR_DI:
7985 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7987 case IA64_BUILTIN_FETCH_AND_AND_SI:
7988 case IA64_BUILTIN_FETCH_AND_AND_DI:
7989 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7991 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7992 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7993 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7995 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7996 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7997 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7999 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8000 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8001 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8003 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8004 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8005 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8007 case IA64_BUILTIN_OR_AND_FETCH_SI:
8008 case IA64_BUILTIN_OR_AND_FETCH_DI:
8009 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8011 case IA64_BUILTIN_AND_AND_FETCH_SI:
8012 case IA64_BUILTIN_AND_AND_FETCH_DI:
8013 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8015 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8016 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8017 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8019 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8020 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8021 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8030 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8031 most significant bits of the stack slot. */
8034 ia64_hpux_function_arg_padding (mode, type)
8035 enum machine_mode mode;
8038 /* Exception to normal case for structures/unions/etc. */
8040 if (type && AGGREGATE_TYPE_P (type)
8041 && int_size_in_bytes (type) < UNITS_PER_WORD)
8044 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8045 hardwired to be true. */
8047 return((mode == BLKmode
8048 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8049 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8050 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8051 ? downward : upward);
8054 /* Switch to the section to which we should output X. The only thing
8055 special we do here is to honor small data. */
8058 ia64_select_rtx_section (mode, x, align)
8059 enum machine_mode mode;
8061 unsigned HOST_WIDE_INT align;
8063 if (GET_MODE_SIZE (mode) > 0
8064 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8067 default_elf_select_rtx_section (mode, x, align);
8070 /* It is illegal to have relocations in shared segments on AIX.
8071 Pretend flag_pic is always set. */
8074 ia64_aix_select_section (exp, reloc, align)
8077 unsigned HOST_WIDE_INT align;
8079 int save_pic = flag_pic;
8081 default_elf_select_section (exp, reloc, align);
8082 flag_pic = save_pic;
8086 ia64_aix_unique_section (decl, reloc)
8090 int save_pic = flag_pic;
8092 default_unique_section (decl, reloc);
8093 flag_pic = save_pic;
8097 ia64_aix_select_rtx_section (mode, x, align)
8098 enum machine_mode mode;
8100 unsigned HOST_WIDE_INT align;
8102 int save_pic = flag_pic;
8104 ia64_select_rtx_section (mode, x, align);
8105 flag_pic = save_pic;
8108 #include "gt-ia64.h"