1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
51 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
53 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
54 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
56 /* These are some macros to abstract register modes. */
57 #define CONST_OK_FOR_ADD(size) \
58 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
59 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
60 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
61 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
63 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
64 int current_function_interrupt;
66 /* ??? The pragma interrupt support will not work for SH3. */
67 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
68 output code for the next function appropriate for an interrupt handler. */
71 /* This is set by the trap_exit attribute for functions. It specifies
72 a trap number to be used in a trapa instruction at function exit
73 (instead of an rte instruction). */
76 /* This is used by the sp_switch attribute for functions. It specifies
77 a variable holding the address of the stack the interrupt function
78 should switch to/from at entry/exit. */
81 /* This is set by #pragma trapa, and is similar to the above, except that
82 the compiler doesn't emit code to preserve all registers. */
83 static int pragma_trapa;
85 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
86 which has a separate set of low regs for User and Supervisor modes.
87 This should only be used for the lowest level of interrupts. Higher levels
88 of interrupts must save the registers in case they themselves are
90 int pragma_nosave_low_regs;
92 /* This is used for communication between SETUP_INCOMING_VARARGS and
93 sh_expand_prologue. */
94 int current_function_anonymous_args;
96 /* Global variables for machine-dependent things. */
98 /* Which cpu are we scheduling for. */
99 enum processor_type sh_cpu;
101 /* Saved operands from the last compare to use when we generate an scc
107 /* Provides the class number of the smallest class containing
110 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
112 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
149 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
153 char sh_register_names[FIRST_PSEUDO_REGISTER] \
154 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
156 char sh_additional_register_names[ADDREGNAMES_SIZE] \
157 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
158 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
160 /* Provide reg_class from a letter such as appears in the machine
161 description. *: target independently reserved letter.
162 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
164 enum reg_class reg_class_from_letter[] =
166 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
167 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
168 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
169 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
170 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
171 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
172 /* y */ FPUL_REGS, /* z */ R0_REGS
175 int assembler_dialect;
177 static void split_branches PARAMS ((rtx));
178 static int branch_dest PARAMS ((rtx));
179 static void force_into PARAMS ((rtx, rtx));
180 static void print_slot PARAMS ((rtx));
181 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
182 static void dump_table PARAMS ((rtx));
183 static int hi_const PARAMS ((rtx));
184 static int broken_move PARAMS ((rtx));
185 static int mova_p PARAMS ((rtx));
186 static rtx find_barrier PARAMS ((int, rtx, rtx));
187 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
188 static rtx gen_block_redirect PARAMS ((rtx, int, int));
189 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
190 static rtx frame_insn PARAMS ((rtx));
191 static rtx push PARAMS ((int));
192 static void pop PARAMS ((int));
193 static void push_regs PARAMS ((HARD_REG_SET *, int));
194 static int calc_live_regs PARAMS ((HARD_REG_SET *));
195 static void mark_use PARAMS ((rtx, rtx *));
196 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
197 static rtx mark_constant_pool_use PARAMS ((rtx));
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
200 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
201 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
202 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
203 static void sh_insert_attributes PARAMS ((tree, tree *));
204 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
205 static int sh_use_dfa_interface PARAMS ((void));
206 static int sh_issue_rate PARAMS ((void));
207 static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
209 static bool sh_cannot_modify_jumps_p PARAMS ((void));
210 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
212 static void sh_encode_section_info PARAMS ((tree, int));
213 static const char *sh_strip_name_encoding PARAMS ((const char *));
214 static void sh_init_builtins PARAMS ((void));
215 static void sh_media_init_builtins PARAMS ((void));
216 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
217 static void sh_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
218 HOST_WIDE_INT, tree));
219 static int flow_dependent_p PARAMS ((rtx, rtx));
220 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
221 static int shiftcosts PARAMS ((rtx));
222 static int andcosts PARAMS ((rtx));
223 static int addsubcosts PARAMS ((rtx));
224 static int multcosts PARAMS ((rtx));
225 static bool unspec_caller_rtx_p PARAMS ((rtx));
226 static bool sh_cannot_copy_insn_p PARAMS ((rtx));
227 static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
228 static int sh_address_cost PARAMS ((rtx));
230 /* Initialize the GCC target structure. */
231 #undef TARGET_ATTRIBUTE_TABLE
232 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
234 /* The next two are used for debug info when compiling with -gdwarf. */
235 #undef TARGET_ASM_UNALIGNED_HI_OP
236 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
237 #undef TARGET_ASM_UNALIGNED_SI_OP
238 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
240 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
241 #undef TARGET_ASM_UNALIGNED_DI_OP
242 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
246 #undef TARGET_ASM_FUNCTION_EPILOGUE
247 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
249 #undef TARGET_ASM_OUTPUT_MI_THUNK
250 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
252 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
253 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
255 #undef TARGET_INSERT_ATTRIBUTES
256 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
258 #undef TARGET_SCHED_ADJUST_COST
259 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
261 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
262 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
264 #undef TARGET_SCHED_ISSUE_RATE
265 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
267 #undef TARGET_CANNOT_MODIFY_JUMPS_P
268 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
270 #undef TARGET_MS_BITFIELD_LAYOUT_P
271 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
273 #undef TARGET_ENCODE_SECTION_INFO
274 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
275 #undef TARGET_STRIP_NAME_ENCODING
276 #define TARGET_STRIP_NAME_ENCODING sh_strip_name_encoding
278 #undef TARGET_INIT_BUILTINS
279 #define TARGET_INIT_BUILTINS sh_init_builtins
280 #undef TARGET_EXPAND_BUILTIN
281 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
283 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
284 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
286 #undef TARGET_CANNOT_COPY_INSN_P
287 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
288 #undef TARGET_RTX_COSTS
289 #define TARGET_RTX_COSTS sh_rtx_costs
290 #undef TARGET_ADDRESS_COST
291 #define TARGET_ADDRESS_COST sh_address_cost
294 #undef TARGET_HAVE_TLS
295 #define TARGET_HAVE_TLS true
298 struct gcc_target targetm = TARGET_INITIALIZER;
300 /* Print the operand address in x to the stream. */
303 print_operand_address (stream, x)
307 switch (GET_CODE (x))
311 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
316 rtx base = XEXP (x, 0);
317 rtx index = XEXP (x, 1);
319 switch (GET_CODE (index))
322 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
323 reg_names[true_regnum (base)]);
329 int base_num = true_regnum (base);
330 int index_num = true_regnum (index);
332 fprintf (stream, "@(r0,%s)",
333 reg_names[MAX (base_num, index_num)]);
345 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
349 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
353 x = mark_constant_pool_use (x);
354 output_addr_const (stream, x);
359 /* Print operand x (an rtx) in assembler syntax to file stream
360 according to modifier code.
362 '.' print a .s if insn needs delay slot
363 ',' print LOCAL_LABEL_PREFIX
364 '@' print trap, rte or rts depending upon pragma interruptness
365 '#' output a nop if there is nothing to put in the delay slot
366 ''' print likelyhood suffix (/u for unlikely).
367 'O' print a constant without the #
368 'R' print the LSW of a dp value - changes if in little endian
369 'S' print the MSW of a dp value - changes if in little endian
370 'T' print the next word of a dp value - same as 'R' in big endian mode.
371 'M' print an `x' if `m' will print `base,index'.
372 'N' print 'r63' if the operand is (const_int 0).
373 'm' print a pair `base,offset' or `base,index', for LD and ST.
374 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
375 'o' output an operator. */
378 print_operand (stream, x, code)
387 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
388 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
389 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
392 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
396 fprintf (stream, "trapa #%d", trap_exit);
397 else if (sh_cfun_interrupt_handler_p ())
398 fprintf (stream, "rte");
400 fprintf (stream, "rts");
403 /* Output a nop if there's nothing in the delay slot. */
404 if (dbr_sequence_length () == 0)
405 fprintf (stream, "\n\tnop");
409 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
411 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
412 fputs ("/u", stream);
416 x = mark_constant_pool_use (x);
417 output_addr_const (stream, x);
420 fputs (reg_names[REGNO (x) + LSW], (stream));
423 fputs (reg_names[REGNO (x) + MSW], (stream));
426 /* Next word of a double. */
427 switch (GET_CODE (x))
430 fputs (reg_names[REGNO (x) + 1], (stream));
433 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
434 && GET_CODE (XEXP (x, 0)) != POST_INC)
435 x = adjust_address (x, SImode, 4);
436 print_operand_address (stream, XEXP (x, 0));
443 switch (GET_CODE (x))
445 case PLUS: fputs ("add", stream); break;
446 case MINUS: fputs ("sub", stream); break;
447 case MULT: fputs ("mul", stream); break;
448 case DIV: fputs ("div", stream); break;
449 case EQ: fputs ("eq", stream); break;
450 case NE: fputs ("ne", stream); break;
451 case GT: case LT: fputs ("gt", stream); break;
452 case GE: case LE: fputs ("ge", stream); break;
453 case GTU: case LTU: fputs ("gtu", stream); break;
454 case GEU: case LEU: fputs ("geu", stream); break;
460 if (GET_CODE (x) == MEM
461 && GET_CODE (XEXP (x, 0)) == PLUS
462 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
463 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
468 if (GET_CODE (x) != MEM)
471 switch (GET_CODE (x))
475 print_operand (stream, x, 0);
476 fputs (", 0", stream);
480 print_operand (stream, XEXP (x, 0), 0);
481 fputs (", ", stream);
482 print_operand (stream, XEXP (x, 1), 0);
491 if (x == CONST0_RTX (GET_MODE (x)))
493 fprintf ((stream), "r63");
498 if (GET_CODE (x) == CONST_INT)
500 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
507 switch (GET_CODE (x))
509 /* FIXME: We need this on SHmedia32 because reload generates
510 some sign-extended HI or QI loads into DImode registers
511 but, because Pmode is SImode, the address ends up with a
512 subreg:SI of the DImode register. Maybe reload should be
513 fixed so as to apply alter_subreg to such loads? */
515 if (SUBREG_BYTE (x) != 0
516 || GET_CODE (SUBREG_REG (x)) != REG)
523 if (FP_REGISTER_P (REGNO (x))
524 && GET_MODE (x) == V16SFmode)
525 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
526 else if (FP_REGISTER_P (REGNO (x))
527 && GET_MODE (x) == V4SFmode)
528 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
529 else if (GET_CODE (x) == REG
530 && GET_MODE (x) == V2SFmode)
531 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
532 else if (FP_REGISTER_P (REGNO (x))
533 && GET_MODE_SIZE (GET_MODE (x)) > 4)
534 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
536 fputs (reg_names[REGNO (x)], (stream));
540 output_address (XEXP (x, 0));
545 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
546 && GET_MODE (XEXP (x, 0)) == DImode
547 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
548 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
550 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
553 if (GET_CODE (val) == ASHIFTRT)
556 if (GET_CODE (XEXP (val, 0)) == CONST)
558 output_addr_const (stream, XEXP (val, 0));
559 if (GET_CODE (XEXP (val, 0)) == CONST)
561 fputs (" >> ", stream);
562 output_addr_const (stream, XEXP (val, 1));
567 if (GET_CODE (val) == CONST)
569 output_addr_const (stream, val);
570 if (GET_CODE (val) == CONST)
573 fputs (" & 65535)", stream);
581 output_addr_const (stream, x);
588 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
590 force_into (value, target)
593 value = force_operand (value, target);
594 if (! rtx_equal_p (value, target))
595 emit_insn (gen_move_insn (target, value));
598 /* Emit code to perform a block move. Choose the best method.
600 OPERANDS[0] is the destination.
601 OPERANDS[1] is the source.
602 OPERANDS[2] is the size.
603 OPERANDS[3] is the alignment safe to use. */
606 expand_block_move (operands)
609 int align = INTVAL (operands[3]);
610 int constp = (GET_CODE (operands[2]) == CONST_INT);
611 int bytes = (constp ? INTVAL (operands[2]) : 0);
613 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
614 alignment, or if it isn't a multiple of 4 bytes, then fail. */
615 if (! constp || align < 4 || (bytes % 4 != 0))
622 else if (bytes == 12)
627 rtx r4 = gen_rtx (REG, SImode, 4);
628 rtx r5 = gen_rtx (REG, SImode, 5);
630 entry_name = get_identifier ("__movstrSI12_i4");
632 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
633 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
634 force_into (XEXP (operands[0], 0), r4);
635 force_into (XEXP (operands[1], 0), r5);
636 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
639 else if (! TARGET_SMALLCODE)
645 rtx r4 = gen_rtx (REG, SImode, 4);
646 rtx r5 = gen_rtx (REG, SImode, 5);
647 rtx r6 = gen_rtx (REG, SImode, 6);
649 entry_name = get_identifier (bytes & 4
651 : "__movstr_i4_even");
652 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
653 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
654 force_into (XEXP (operands[0], 0), r4);
655 force_into (XEXP (operands[1], 0), r5);
658 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
659 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
671 rtx r4 = gen_rtx_REG (SImode, 4);
672 rtx r5 = gen_rtx_REG (SImode, 5);
674 sprintf (entry, "__movstrSI%d", bytes);
675 entry_name = get_identifier (entry);
676 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
677 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
678 force_into (XEXP (operands[0], 0), r4);
679 force_into (XEXP (operands[1], 0), r5);
680 emit_insn (gen_block_move_real (func_addr_rtx));
684 /* This is the same number of bytes as a memcpy call, but to a different
685 less common function name, so this will occasionally use more space. */
686 if (! TARGET_SMALLCODE)
691 int final_switch, while_loop;
692 rtx r4 = gen_rtx_REG (SImode, 4);
693 rtx r5 = gen_rtx_REG (SImode, 5);
694 rtx r6 = gen_rtx_REG (SImode, 6);
696 entry_name = get_identifier ("__movstr");
697 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
698 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
699 force_into (XEXP (operands[0], 0), r4);
700 force_into (XEXP (operands[1], 0), r5);
702 /* r6 controls the size of the move. 16 is decremented from it
703 for each 64 bytes moved. Then the negative bit left over is used
704 as an index into a list of move instructions. e.g., a 72 byte move
705 would be set up with size(r6) = 14, for one iteration through the
706 big while loop, and a switch of -2 for the last part. */
708 final_switch = 16 - ((bytes / 4) % 16);
709 while_loop = ((bytes / 4) / 16 - 1) * 16;
710 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
711 emit_insn (gen_block_lump_real (func_addr_rtx));
718 /* Prepare operands for a move define_expand; specifically, one of the
719 operands must be in a register. */
722 prepare_move_operands (operands, mode)
724 enum machine_mode mode;
726 if ((mode == SImode || mode == DImode)
728 && ! ((mode == Pmode || mode == ptr_mode)
729 && tls_symbolic_operand (operands[1], Pmode) != 0))
732 if (SYMBOLIC_CONST_P (operands[1]))
734 if (GET_CODE (operands[0]) == MEM)
735 operands[1] = force_reg (Pmode, operands[1]);
736 else if (TARGET_SHMEDIA
737 && GET_CODE (operands[1]) == LABEL_REF
738 && target_reg_operand (operands[0], mode))
742 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
743 operands[1] = legitimize_pic_address (operands[1], mode, temp);
746 else if (GET_CODE (operands[1]) == CONST
747 && GET_CODE (XEXP (operands[1], 0)) == PLUS
748 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
750 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
751 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
753 operands[1] = expand_binop (mode, add_optab, temp,
754 XEXP (XEXP (operands[1], 0), 1),
755 no_new_pseudos ? temp
756 : gen_reg_rtx (Pmode),
761 if (! reload_in_progress && ! reload_completed)
763 /* Copy the source to a register if both operands aren't registers. */
764 if (! register_operand (operands[0], mode)
765 && ! sh_register_operand (operands[1], mode))
766 operands[1] = copy_to_mode_reg (mode, operands[1]);
768 /* This case can happen while generating code to move the result
769 of a library call to the target. Reject `st r0,@(rX,rY)' because
770 reload will fail to find a spill register for rX, since r0 is already
771 being used for the source. */
772 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
773 && GET_CODE (operands[0]) == MEM
774 && GET_CODE (XEXP (operands[0], 0)) == PLUS
775 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
776 operands[1] = copy_to_mode_reg (mode, operands[1]);
779 if (mode == Pmode || mode == ptr_mode)
782 enum tls_model tls_kind;
786 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
788 rtx tga_op1, tga_ret, tmp, tmp2;
793 case TLS_MODEL_GLOBAL_DYNAMIC:
794 tga_ret = gen_rtx_REG (Pmode, R0_REG);
795 emit_insn (gen_tls_global_dynamic (tga_ret, op1));
799 case TLS_MODEL_LOCAL_DYNAMIC:
800 tga_ret = gen_rtx_REG (Pmode, R0_REG);
801 emit_insn (gen_tls_local_dynamic (tga_ret, op1));
803 tmp = gen_reg_rtx (Pmode);
804 emit_move_insn (tmp, tga_ret);
806 if (register_operand (op0, Pmode))
809 tmp2 = gen_reg_rtx (Pmode);
811 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
815 case TLS_MODEL_INITIAL_EXEC:
817 emit_insn (gen_GOTaddr2picreg ());
818 tga_op1 = gen_reg_rtx (Pmode);
819 tmp = gen_sym2GOTTPOFF (op1);
820 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
824 case TLS_MODEL_LOCAL_EXEC:
825 tmp2 = gen_reg_rtx (Pmode);
826 emit_insn (gen_load_gbr (tmp2));
827 tmp = gen_reg_rtx (Pmode);
828 emit_insn (gen_symTPOFF2reg (tmp, op1));
829 RTX_UNCHANGING_P (tmp) = 1;
831 if (register_operand (op0, Pmode))
834 op1 = gen_reg_rtx (Pmode);
836 emit_insn (gen_addsi3 (op1, tmp, tmp2));
849 /* Prepare the operands for an scc instruction; make sure that the
850 compare has been done. */
852 prepare_scc_operands (code)
855 rtx t_reg = gen_rtx_REG (SImode, T_REG);
856 enum rtx_code oldcode = code;
857 enum machine_mode mode;
859 /* First need a compare insn. */
863 /* It isn't possible to handle this case. */
882 rtx tmp = sh_compare_op0;
883 sh_compare_op0 = sh_compare_op1;
884 sh_compare_op1 = tmp;
887 mode = GET_MODE (sh_compare_op0);
888 if (mode == VOIDmode)
889 mode = GET_MODE (sh_compare_op1);
891 sh_compare_op0 = force_reg (mode, sh_compare_op0);
892 if ((code != EQ && code != NE
893 && (sh_compare_op1 != const0_rtx
894 || code == GTU || code == GEU || code == LTU || code == LEU))
895 || (mode == DImode && sh_compare_op1 != const0_rtx)
896 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
897 sh_compare_op1 = force_reg (mode, sh_compare_op1);
899 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
900 (mode == SFmode ? emit_sf_insn : emit_df_insn)
901 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
902 gen_rtx (SET, VOIDmode, t_reg,
903 gen_rtx (code, SImode,
904 sh_compare_op0, sh_compare_op1)),
905 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
907 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
908 gen_rtx (code, SImode, sh_compare_op0,
914 /* Called from the md file, set up the operands of a compare instruction. */
917 from_compare (operands, code)
921 enum machine_mode mode = GET_MODE (sh_compare_op0);
923 if (mode == VOIDmode)
924 mode = GET_MODE (sh_compare_op1);
927 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
929 /* Force args into regs, since we can't use constants here. */
930 sh_compare_op0 = force_reg (mode, sh_compare_op0);
931 if (sh_compare_op1 != const0_rtx
932 || code == GTU || code == GEU
933 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
934 sh_compare_op1 = force_reg (mode, sh_compare_op1);
936 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
938 from_compare (operands, GT);
939 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
942 insn = gen_rtx_SET (VOIDmode,
943 gen_rtx_REG (SImode, T_REG),
944 gen_rtx (code, SImode, sh_compare_op0,
946 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
948 insn = gen_rtx (PARALLEL, VOIDmode,
950 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
951 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
957 /* Functions to output assembly code. */
959 /* Return a sequence of instructions to perform DI or DF move.
961 Since the SH cannot move a DI or DF in one instruction, we have
962 to take care when we see overlapping source and dest registers. */
965 output_movedouble (insn, operands, mode)
966 rtx insn ATTRIBUTE_UNUSED;
968 enum machine_mode mode;
970 rtx dst = operands[0];
971 rtx src = operands[1];
973 if (GET_CODE (dst) == MEM
974 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
975 return "mov.l %T1,%0\n\tmov.l %1,%0";
977 if (register_operand (dst, mode)
978 && register_operand (src, mode))
980 if (REGNO (src) == MACH_REG)
981 return "sts mach,%S0\n\tsts macl,%R0";
983 /* When mov.d r1,r2 do r2->r3 then r1->r2;
984 when mov.d r1,r0 do r1->r0 then r2->r1. */
986 if (REGNO (src) + 1 == REGNO (dst))
987 return "mov %T1,%T0\n\tmov %1,%0";
989 return "mov %1,%0\n\tmov %T1,%T0";
991 else if (GET_CODE (src) == CONST_INT)
993 if (INTVAL (src) < 0)
994 output_asm_insn ("mov #-1,%S0", operands);
996 output_asm_insn ("mov #0,%S0", operands);
1000 else if (GET_CODE (src) == MEM)
1003 int dreg = REGNO (dst);
1004 rtx inside = XEXP (src, 0);
1006 if (GET_CODE (inside) == REG)
1007 ptrreg = REGNO (inside);
1008 else if (GET_CODE (inside) == SUBREG)
1009 ptrreg = subreg_regno (inside);
1010 else if (GET_CODE (inside) == PLUS)
1012 ptrreg = REGNO (XEXP (inside, 0));
1013 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1014 an offsettable address. Unfortunately, offsettable addresses use
1015 QImode to check the offset, and a QImode offsettable address
1016 requires r0 for the other operand, which is not currently
1017 supported, so we can't use the 'o' constraint.
1018 Thus we must check for and handle r0+REG addresses here.
1019 We punt for now, since this is likely very rare. */
1020 if (GET_CODE (XEXP (inside, 1)) == REG)
1023 else if (GET_CODE (inside) == LABEL_REF)
1024 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1025 else if (GET_CODE (inside) == POST_INC)
1026 return "mov.l %1,%0\n\tmov.l %1,%T0";
1030 /* Work out the safe way to copy. Copy into the second half first. */
1032 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1035 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1038 /* Print an instruction which would have gone into a delay slot after
1039 another instruction, but couldn't because the other instruction expanded
1040 into a sequence where putting the slot insn at the end wouldn't work. */
1046 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
1048 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1052 output_far_jump (insn, op)
1056 struct { rtx lab, reg, op; } this;
1057 rtx braf_base_lab = NULL_RTX;
1060 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1063 this.lab = gen_label_rtx ();
1067 && offset - get_attr_length (insn) <= 32766)
1070 jump = "mov.w %O0,%1; braf %1";
1078 jump = "mov.l %O0,%1; braf %1";
1080 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1083 jump = "mov.l %O0,%1; jmp @%1";
1085 /* If we have a scratch register available, use it. */
1086 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1087 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1089 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1090 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1091 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1092 output_asm_insn (jump, &this.lab);
1093 if (dbr_sequence_length ())
1094 print_slot (final_sequence);
1096 output_asm_insn ("nop", 0);
1100 /* Output the delay slot insn first if any. */
1101 if (dbr_sequence_length ())
1102 print_slot (final_sequence);
1104 this.reg = gen_rtx_REG (SImode, 13);
1105 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1106 Fortunately, MACL is fixed and call-clobbered, and we never
1107 need its value across jumps, so save r13 in it instead of in
1110 output_asm_insn ("lds r13, macl", 0);
1112 output_asm_insn ("mov.l r13,@-r15", 0);
1113 output_asm_insn (jump, &this.lab);
1115 output_asm_insn ("sts macl, r13", 0);
1117 output_asm_insn ("mov.l @r15+,r13", 0);
1119 if (far && flag_pic && TARGET_SH2)
1121 braf_base_lab = gen_label_rtx ();
1122 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1123 CODE_LABEL_NUMBER (braf_base_lab));
1126 output_asm_insn (".align 2", 0);
1127 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1129 if (far && flag_pic)
1132 this.lab = braf_base_lab;
1133 output_asm_insn (".long %O2-%O0", &this.lab);
1136 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1140 /* Local label counter, used for constants in the pool and inside
1141 pattern branches. */
1143 static int lf = 100;
1145 /* Output code for ordinary branches. */
1148 output_branch (logic, insn, operands)
1153 switch (get_attr_length (insn))
1156 /* This can happen if filling the delay slot has caused a forward
1157 branch to exceed its range (we could reverse it, but only
1158 when we know we won't overextend other branches; this should
1159 best be handled by relaxation).
1160 It can also happen when other condbranches hoist delay slot insn
1161 from their destination, thus leading to code size increase.
1162 But the branch will still be in the range -4092..+4098 bytes. */
1167 /* The call to print_slot will clobber the operands. */
1168 rtx op0 = operands[0];
1170 /* If the instruction in the delay slot is annulled (true), then
1171 there is no delay slot where we can put it now. The only safe
1172 place for it is after the label. final will do that by default. */
1175 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1177 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1178 ASSEMBLER_DIALECT ? "/" : ".", label);
1179 print_slot (final_sequence);
1182 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1184 output_asm_insn ("bra\t%l0", &op0);
1185 fprintf (asm_out_file, "\tnop\n");
1186 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1190 /* When relaxing, handle this like a short branch. The linker
1191 will fix it up if it still doesn't fit after relaxation. */
1193 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1195 /* These are for SH2e, in which we have to account for the
1196 extra nop because of the hardware bug in annulled branches. */
1203 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1205 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1207 ASSEMBLER_DIALECT ? "/" : ".", label);
1208 fprintf (asm_out_file, "\tnop\n");
1209 output_asm_insn ("bra\t%l0", operands);
1210 fprintf (asm_out_file, "\tnop\n");
1211 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1215 /* When relaxing, fall through. */
1220 sprintf (buffer, "b%s%ss\t%%l0",
1222 ASSEMBLER_DIALECT ? "/" : ".");
1223 output_asm_insn (buffer, &operands[0]);
1228 /* There should be no longer branches now - that would
1229 indicate that something has destroyed the branches set
1230 up in machine_dependent_reorg. */
1236 output_branchy_insn (code, template, insn, operands)
1238 const char *template;
1242 rtx next_insn = NEXT_INSN (insn);
1244 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1246 rtx src = SET_SRC (PATTERN (next_insn));
1247 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1249 /* Following branch not taken */
1250 operands[9] = gen_label_rtx ();
1251 emit_label_after (operands[9], next_insn);
1252 INSN_ADDRESSES_NEW (operands[9],
1253 INSN_ADDRESSES (INSN_UID (next_insn))
1254 + get_attr_length (next_insn));
1259 int offset = (branch_dest (next_insn)
1260 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1261 if (offset >= -252 && offset <= 258)
1263 if (GET_CODE (src) == IF_THEN_ELSE)
1265 src = XEXP (src, 1);
1271 operands[9] = gen_label_rtx ();
1272 emit_label_after (operands[9], insn);
1273 INSN_ADDRESSES_NEW (operands[9],
1274 INSN_ADDRESSES (INSN_UID (insn))
1275 + get_attr_length (insn));
1280 output_ieee_ccmpeq (insn, operands)
1281 rtx insn, *operands;
1283 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1286 /* Output to FILE the start of the assembler file. */
1289 output_file_start (file)
1292 output_file_directive (file, main_input_filename);
1294 /* Switch to the data section so that the coffsem symbol
1295 isn't in the text section. */
1298 if (TARGET_LITTLE_ENDIAN)
1299 fprintf (file, "\t.little\n");
1301 if (TARGET_SHCOMPACT)
1302 fprintf (file, "\t.mode\tSHcompact\n");
1303 else if (TARGET_SHMEDIA)
1304 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1305 TARGET_SHMEDIA64 ? 64 : 32);
1308 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1311 unspec_caller_rtx_p (pat)
1314 switch (GET_CODE (pat))
1317 return unspec_caller_rtx_p (XEXP (pat, 0));
1320 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1322 return unspec_caller_rtx_p (XEXP (pat, 1));
1324 if (XINT (pat, 1) == UNSPEC_CALLER)
1333 /* Indicate that INSN cannot be duplicated. This is true for insn
1334 that generates an unique label. */
1337 sh_cannot_copy_insn_p (insn)
1342 if (!reload_completed || !flag_pic)
1345 if (GET_CODE (insn) != INSN)
1347 if (asm_noperands (insn) >= 0)
1350 pat = PATTERN (insn);
1351 if (GET_CODE (pat) != SET)
1353 pat = SET_SRC (pat);
1355 if (unspec_caller_rtx_p (pat))
1361 /* Actual number of instructions used to make a shift by N. */
1362 static const char ashiftrt_insns[] =
1363 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1365 /* Left shift and logical right shift are the same. */
1366 static const char shift_insns[] =
1367 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1369 /* Individual shift amounts needed to get the above length sequences.
1370 One bit right shifts clobber the T bit, so when possible, put one bit
1371 shifts in the middle of the sequence, so the ends are eligible for
1372 branch delay slots. */
1373 static const short shift_amounts[32][5] = {
1374 {0}, {1}, {2}, {2, 1},
1375 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1376 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1377 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1378 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1379 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1380 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1381 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1383 /* Likewise, but for shift amounts < 16, up to three highmost bits
1384 might be clobbered. This is typically used when combined with some
1385 kind of sign or zero extension. */
1387 static const char ext_shift_insns[] =
1388 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1390 static const short ext_shift_amounts[32][4] = {
1391 {0}, {1}, {2}, {2, 1},
1392 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1393 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1394 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1395 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1396 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1397 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1398 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1400 /* Assuming we have a value that has been sign-extended by at least one bit,
1401 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1402 to shift it by N without data loss, and quicker than by other means? */
1403 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1405 /* This is used in length attributes in sh.md to help compute the length
1406 of arbitrary constant shift instructions. */
1409 shift_insns_rtx (insn)
1412 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1413 int shift_count = INTVAL (XEXP (set_src, 1));
1414 enum rtx_code shift_code = GET_CODE (set_src);
1419 return ashiftrt_insns[shift_count];
1422 return shift_insns[shift_count];
1428 /* Return the cost of a shift. */
1439 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1441 if (GET_MODE (x) == DImode
1442 && GET_CODE (XEXP (x, 1)) == CONST_INT
1443 && INTVAL (XEXP (x, 1)) == 1)
1446 /* Everything else is invalid, because there is no pattern for it. */
1449 /* If shift by a non constant, then this will be expensive. */
1450 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1451 return SH_DYNAMIC_SHIFT_COST;
1453 value = INTVAL (XEXP (x, 1));
1455 /* Otherwise, return the true cost in instructions. */
1456 if (GET_CODE (x) == ASHIFTRT)
1458 int cost = ashiftrt_insns[value];
1459 /* If SH3, then we put the constant in a reg and use shad. */
1460 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1461 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1465 return shift_insns[value];
1468 /* Return the cost of an AND operation. */
1476 /* Anding with a register is a single cycle and instruction. */
1477 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1480 i = INTVAL (XEXP (x, 1));
1484 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1485 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1486 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1492 /* These constants are single cycle extu.[bw] instructions. */
1493 if (i == 0xff || i == 0xffff)
1495 /* Constants that can be used in an and immediate instruction is a single
1496 cycle, but this requires r0, so make it a little more expensive. */
1497 if (CONST_OK_FOR_L (i))
1499 /* Constants that can be loaded with a mov immediate and an and.
1500 This case is probably unnecessary. */
1501 if (CONST_OK_FOR_I (i))
1503 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1504 This case is probably unnecessary. */
1508 /* Return the cost of an addition or a subtraction. */
1514 /* Adding a register is a single cycle insn. */
1515 if (GET_CODE (XEXP (x, 1)) == REG
1516 || GET_CODE (XEXP (x, 1)) == SUBREG)
1519 /* Likewise for small constants. */
1520 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1521 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1525 switch (GET_CODE (XEXP (x, 1)))
1530 return TARGET_SHMEDIA64 ? 5 : 3;
1533 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1535 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1537 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1545 /* Any other constant requires a 2 cycle pc-relative load plus an
1550 /* Return the cost of a multiply. */
1553 rtx x ATTRIBUTE_UNUSED;
1560 /* We have a mul insn, so we can never take more than the mul and the
1561 read of the mac reg, but count more because of the latency and extra
1563 if (TARGET_SMALLCODE)
1568 /* If we're aiming at small code, then just count the number of
1569 insns in a multiply call sequence. */
1570 if (TARGET_SMALLCODE)
1573 /* Otherwise count all the insns in the routine we'd be calling too. */
1577 /* Compute a (partial) cost for rtx X. Return true if the complete
1578 cost has been computed, and false if subexpressions should be
1579 scanned. In either case, *TOTAL contains the cost result. */
1582 sh_rtx_costs (x, code, outer_code, total)
1584 int code, outer_code, *total;
1591 if (INTVAL (x) == 0)
1593 else if (outer_code == AND && and_operand ((x), DImode))
1595 else if ((outer_code == IOR || outer_code == XOR
1596 || outer_code == PLUS)
1597 && CONST_OK_FOR_P (INTVAL (x)))
1599 else if (CONST_OK_FOR_J (INTVAL (x)))
1600 *total = COSTS_N_INSNS (outer_code != SET);
1601 else if (CONST_OK_FOR_J (INTVAL (x) >> 16))
1602 *total = COSTS_N_INSNS (2);
1603 else if (CONST_OK_FOR_J ((INTVAL (x) >> 16) >> 16))
1604 *total = COSTS_N_INSNS (3);
1606 *total = COSTS_N_INSNS (4);
1609 if (CONST_OK_FOR_I (INTVAL (x)))
1611 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1612 && CONST_OK_FOR_L (INTVAL (x)))
1621 if (TARGET_SHMEDIA64)
1622 *total = COSTS_N_INSNS (4);
1623 else if (TARGET_SHMEDIA32)
1624 *total = COSTS_N_INSNS (2);
1631 *total = COSTS_N_INSNS (4);
1637 *total = COSTS_N_INSNS (addsubcosts (x));
1641 *total = COSTS_N_INSNS (andcosts (x));
1645 *total = COSTS_N_INSNS (multcosts (x));
1651 *total = COSTS_N_INSNS (shiftcosts (x));
1658 *total = COSTS_N_INSNS (20);
1671 /* Compute the cost of an address. For the SH, all valid addresses are
1672 the same cost. Use a slightly higher cost for reg + reg addressing,
1673 since it increases pressure on r0. */
1679 return (GET_CODE (X) == PLUS
1680 && ! CONSTANT_P (XEXP (X, 1))
1681 && ! TARGET_SHMEDIA ? 1 : 0);
1684 /* Code to expand a shift. */
1687 gen_ashift (type, n, reg)
1692 /* Negative values here come from the shift_amounts array. */
1705 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1709 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1711 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1714 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1719 /* Same for HImode */
1722 gen_ashift_hi (type, n, reg)
1727 /* Negative values here come from the shift_amounts array. */
1741 /* We don't have HImode right shift operations because using the
1742 ordinary 32 bit shift instructions for that doesn't generate proper
1743 zero/sign extension.
1744 gen_ashift_hi is only called in contexts where we know that the
1745 sign extension works out correctly. */
1748 if (GET_CODE (reg) == SUBREG)
1750 offset = SUBREG_BYTE (reg);
1751 reg = SUBREG_REG (reg);
1753 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1757 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1762 /* Output RTL to split a constant shift into its component SH constant
1763 shift instructions. */
1766 gen_shifty_op (code, operands)
1770 int value = INTVAL (operands[2]);
1773 /* Truncate the shift count in case it is out of bounds. */
1774 value = value & 0x1f;
1778 if (code == LSHIFTRT)
1780 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1781 emit_insn (gen_movt (operands[0]));
1784 else if (code == ASHIFT)
1786 /* There is a two instruction sequence for 31 bit left shifts,
1787 but it requires r0. */
1788 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1790 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1791 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1796 else if (value == 0)
1798 /* This can happen when not optimizing. We must output something here
1799 to prevent the compiler from aborting in final.c after the try_split
1801 emit_insn (gen_nop ());
1805 max = shift_insns[value];
1806 for (i = 0; i < max; i++)
1807 gen_ashift (code, shift_amounts[value][i], operands[0]);
1810 /* Same as above, but optimized for values where the topmost bits don't
1814 gen_shifty_hi_op (code, operands)
1818 int value = INTVAL (operands[2]);
1820 void (*gen_fun) PARAMS ((int, int, rtx));
1822 /* This operation is used by and_shl for SImode values with a few
1823 high bits known to be cleared. */
1827 emit_insn (gen_nop ());
1831 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1834 max = ext_shift_insns[value];
1835 for (i = 0; i < max; i++)
1836 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1839 /* When shifting right, emit the shifts in reverse order, so that
1840 solitary negative values come first. */
1841 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1842 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1845 /* Output RTL for an arithmetic right shift. */
1847 /* ??? Rewrite to use super-optimizer sequences. */
1850 expand_ashiftrt (operands)
1861 if (GET_CODE (operands[2]) != CONST_INT)
1863 rtx count = copy_to_mode_reg (SImode, operands[2]);
1864 emit_insn (gen_negsi2 (count, count));
1865 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1868 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1869 > 1 + SH_DYNAMIC_SHIFT_COST)
1872 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1873 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1877 if (GET_CODE (operands[2]) != CONST_INT)
1880 value = INTVAL (operands[2]) & 31;
1884 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1887 else if (value >= 16 && value <= 19)
1889 wrk = gen_reg_rtx (SImode);
1890 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1893 gen_ashift (ASHIFTRT, 1, wrk);
1894 emit_move_insn (operands[0], wrk);
1897 /* Expand a short sequence inline, longer call a magic routine. */
1898 else if (value <= 5)
1900 wrk = gen_reg_rtx (SImode);
1901 emit_move_insn (wrk, operands[1]);
1903 gen_ashift (ASHIFTRT, 1, wrk);
1904 emit_move_insn (operands[0], wrk);
1908 wrk = gen_reg_rtx (Pmode);
1910 /* Load the value into an arg reg and call a helper. */
1911 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1912 sprintf (func, "__ashiftrt_r4_%d", value);
1913 func_name = get_identifier (func);
1914 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1915 emit_move_insn (wrk, sym);
1916 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1917 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1922 sh_dynamicalize_shift_p (count)
1925 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1928 /* Try to find a good way to implement the combiner pattern
1929 [(set (match_operand:SI 0 "register_operand" "r")
1930 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1931 (match_operand:SI 2 "const_int_operand" "n"))
1932 (match_operand:SI 3 "const_int_operand" "n"))) .
1933 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1934 return 0 for simple right / left or left/right shift combination.
1935 return 1 for a combination of shifts with zero_extend.
1936 return 2 for a combination of shifts with an AND that needs r0.
1937 return 3 for a combination of shifts with an AND that needs an extra
1938 scratch register, when the three highmost bits of the AND mask are clear.
1939 return 4 for a combination of shifts with an AND that needs an extra
1940 scratch register, when any of the three highmost bits of the AND mask
1942 If ATTRP is set, store an initial right shift width in ATTRP[0],
1943 and the instruction length in ATTRP[1] . These values are not valid
1945 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1946 shift_amounts for the last shift value that is to be used before the
1949 shl_and_kind (left_rtx, mask_rtx, attrp)
1950 rtx left_rtx, mask_rtx;
1953 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1954 int left = INTVAL (left_rtx), right;
1956 int cost, best_cost = 10000;
1957 int best_right = 0, best_len = 0;
1961 if (left < 0 || left > 31)
1963 if (GET_CODE (mask_rtx) == CONST_INT)
1964 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1966 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1967 /* Can this be expressed as a right shift / left shift pair ? */
1968 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1969 right = exact_log2 (lsb);
1970 mask2 = ~(mask + lsb - 1);
1971 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1972 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1974 best_cost = shift_insns[right] + shift_insns[right + left];
1975 /* mask has no trailing zeroes <==> ! right */
1976 else if (! right && mask2 == ~(lsb2 - 1))
1978 int late_right = exact_log2 (lsb2);
1979 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1981 /* Try to use zero extend */
1982 if (mask2 == ~(lsb2 - 1))
1986 for (width = 8; width <= 16; width += 8)
1988 /* Can we zero-extend right away? */
1989 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1992 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1993 if (cost < best_cost)
2004 /* ??? Could try to put zero extend into initial right shift,
2005 or even shift a bit left before the right shift. */
2006 /* Determine value of first part of left shift, to get to the
2007 zero extend cut-off point. */
2008 first = width - exact_log2 (lsb2) + right;
2009 if (first >= 0 && right + left - first >= 0)
2011 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2012 + ext_shift_insns[right + left - first];
2013 if (cost < best_cost)
2025 /* Try to use r0 AND pattern */
2026 for (i = 0; i <= 2; i++)
2030 if (! CONST_OK_FOR_L (mask >> i))
2032 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2033 if (cost < best_cost)
2038 best_len = cost - 1;
2041 /* Try to use a scratch register to hold the AND operand. */
2042 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2043 for (i = 0; i <= 2; i++)
2047 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
2048 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2049 if (cost < best_cost)
2054 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
2060 attrp[0] = best_right;
2061 attrp[1] = best_len;
2066 /* This is used in length attributes of the unnamed instructions
2067 corresponding to shl_and_kind return values of 1 and 2. */
2069 shl_and_length (insn)
2072 rtx set_src, left_rtx, mask_rtx;
2075 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2076 left_rtx = XEXP (XEXP (set_src, 0), 1);
2077 mask_rtx = XEXP (set_src, 1);
2078 shl_and_kind (left_rtx, mask_rtx, attributes);
2079 return attributes[1];
2082 /* This is used in length attribute of the and_shl_scratch instruction. */
2085 shl_and_scr_length (insn)
2088 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2089 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2090 rtx op = XEXP (set_src, 0);
2091 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2092 op = XEXP (XEXP (op, 0), 0);
2093 return len + shift_insns[INTVAL (XEXP (op, 1))];
2096 /* Generating rtl? */
2097 extern int rtx_equal_function_value_matters;
2099 /* Generate rtl for instructions for which shl_and_kind advised a particular
2100 method of generating them, i.e. returned zero. */
2103 gen_shl_and (dest, left_rtx, mask_rtx, source)
2104 rtx dest, left_rtx, mask_rtx, source;
2107 unsigned HOST_WIDE_INT mask;
2108 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2109 int right, total_shift;
2110 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
2112 right = attributes[0];
2113 total_shift = INTVAL (left_rtx) + right;
2114 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2121 int first = attributes[2];
2126 emit_insn ((mask << right) <= 0xff
2127 ? gen_zero_extendqisi2(dest,
2128 gen_lowpart (QImode, source))
2129 : gen_zero_extendhisi2(dest,
2130 gen_lowpart (HImode, source)));
2134 emit_insn (gen_movsi (dest, source));
2138 operands[2] = GEN_INT (right);
2139 gen_shifty_hi_op (LSHIFTRT, operands);
2143 operands[2] = GEN_INT (first);
2144 gen_shifty_hi_op (ASHIFT, operands);
2145 total_shift -= first;
2149 emit_insn (mask <= 0xff
2150 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2151 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2152 if (total_shift > 0)
2154 operands[2] = GEN_INT (total_shift);
2155 gen_shifty_hi_op (ASHIFT, operands);
2160 shift_gen_fun = gen_shifty_op;
2162 /* If the topmost bit that matters is set, set the topmost bits
2163 that don't matter. This way, we might be able to get a shorter
2165 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2166 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2168 /* Don't expand fine-grained when combining, because that will
2169 make the pattern fail. */
2170 if (rtx_equal_function_value_matters
2171 || reload_in_progress || reload_completed)
2175 /* Cases 3 and 4 should be handled by this split
2176 only while combining */
2181 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2184 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2189 operands[2] = GEN_INT (total_shift);
2190 shift_gen_fun (ASHIFT, operands);
2197 if (kind != 4 && total_shift < 16)
2199 neg = -ext_shift_amounts[total_shift][1];
2201 neg -= ext_shift_amounts[total_shift][2];
2205 emit_insn (gen_and_shl_scratch (dest, source,
2208 GEN_INT (total_shift + neg),
2210 emit_insn (gen_movsi (dest, dest));
2217 /* Try to find a good way to implement the combiner pattern
2218 [(set (match_operand:SI 0 "register_operand" "=r")
2219 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2220 (match_operand:SI 2 "const_int_operand" "n")
2221 (match_operand:SI 3 "const_int_operand" "n")
2223 (clobber (reg:SI T_REG))]
2224 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2225 return 0 for simple left / right shift combination.
2226 return 1 for left shift / 8 bit sign extend / left shift.
2227 return 2 for left shift / 16 bit sign extend / left shift.
2228 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2229 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2230 return 5 for left shift / 16 bit sign extend / right shift
2231 return 6 for < 8 bit sign extend / left shift.
2232 return 7 for < 8 bit sign extend / left shift / single right shift.
2233 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2236 shl_sext_kind (left_rtx, size_rtx, costp)
2237 rtx left_rtx, size_rtx;
2240 int left, size, insize, ext;
2241 int cost = 0, best_cost;
2244 left = INTVAL (left_rtx);
2245 size = INTVAL (size_rtx);
2246 insize = size - left;
2249 /* Default to left / right shift. */
2251 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2254 /* 16 bit shift / sign extend / 16 bit shift */
2255 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2256 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2257 below, by alternative 3 or something even better. */
2258 if (cost < best_cost)
2264 /* Try a plain sign extend between two shifts. */
2265 for (ext = 16; ext >= insize; ext -= 8)
2269 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2270 if (cost < best_cost)
2272 kind = ext / (unsigned) 8;
2276 /* Check if we can do a sloppy shift with a final signed shift
2277 restoring the sign. */
2278 if (EXT_SHIFT_SIGNED (size - ext))
2279 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2280 /* If not, maybe it's still cheaper to do the second shift sloppy,
2281 and do a final sign extend? */
2282 else if (size <= 16)
2283 cost = ext_shift_insns[ext - insize] + 1
2284 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2287 if (cost < best_cost)
2289 kind = ext / (unsigned) 8 + 2;
2293 /* Check if we can sign extend in r0 */
2296 cost = 3 + shift_insns[left];
2297 if (cost < best_cost)
2302 /* Try the same with a final signed shift. */
2305 cost = 3 + ext_shift_insns[left + 1] + 1;
2306 if (cost < best_cost)
2315 /* Try to use a dynamic shift. */
2316 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2317 if (cost < best_cost)
2328 /* Function to be used in the length attribute of the instructions
2329 implementing this pattern. */
2332 shl_sext_length (insn)
2335 rtx set_src, left_rtx, size_rtx;
2338 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2339 left_rtx = XEXP (XEXP (set_src, 0), 1);
2340 size_rtx = XEXP (set_src, 1);
2341 shl_sext_kind (left_rtx, size_rtx, &cost);
2345 /* Generate rtl for this pattern */
2348 gen_shl_sext (dest, left_rtx, size_rtx, source)
2349 rtx dest, left_rtx, size_rtx, source;
2352 int left, size, insize, cost;
2355 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2356 left = INTVAL (left_rtx);
2357 size = INTVAL (size_rtx);
2358 insize = size - left;
2366 int ext = kind & 1 ? 8 : 16;
2367 int shift2 = size - ext;
2369 /* Don't expand fine-grained when combining, because that will
2370 make the pattern fail. */
2371 if (! rtx_equal_function_value_matters
2372 && ! reload_in_progress && ! reload_completed)
2374 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2375 emit_insn (gen_movsi (dest, source));
2379 emit_insn (gen_movsi (dest, source));
2383 operands[2] = GEN_INT (ext - insize);
2384 gen_shifty_hi_op (ASHIFT, operands);
2387 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2388 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2393 operands[2] = GEN_INT (shift2);
2394 gen_shifty_op (ASHIFT, operands);
2401 if (EXT_SHIFT_SIGNED (shift2))
2403 operands[2] = GEN_INT (shift2 + 1);
2404 gen_shifty_op (ASHIFT, operands);
2405 operands[2] = GEN_INT (1);
2406 gen_shifty_op (ASHIFTRT, operands);
2409 operands[2] = GEN_INT (shift2);
2410 gen_shifty_hi_op (ASHIFT, operands);
2414 operands[2] = GEN_INT (-shift2);
2415 gen_shifty_hi_op (LSHIFTRT, operands);
2417 emit_insn (size <= 8
2418 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2419 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2426 if (! rtx_equal_function_value_matters
2427 && ! reload_in_progress && ! reload_completed)
2428 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2432 operands[2] = GEN_INT (16 - insize);
2433 gen_shifty_hi_op (ASHIFT, operands);
2434 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2436 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2438 gen_ashift (ASHIFTRT, 1, dest);
2443 /* Don't expand fine-grained when combining, because that will
2444 make the pattern fail. */
2445 if (! rtx_equal_function_value_matters
2446 && ! reload_in_progress && ! reload_completed)
2448 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2449 emit_insn (gen_movsi (dest, source));
2452 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2453 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2454 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2456 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2457 gen_shifty_op (ASHIFT, operands);
2459 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2467 /* Prefix a symbol_ref name with "datalabel". */
2470 gen_datalabel_ref (sym)
2473 if (GET_CODE (sym) == LABEL_REF)
2474 return gen_rtx_CONST (GET_MODE (sym),
2475 gen_rtx_UNSPEC (GET_MODE (sym),
2479 if (GET_CODE (sym) != SYMBOL_REF)
2482 XSTR (sym, 0) = concat (SH_DATALABEL_ENCODING, XSTR (sym, 0), NULL);
2488 /* The SH cannot load a large constant into a register, constants have to
2489 come from a pc relative load. The reference of a pc relative load
2490 instruction must be less than 1k infront of the instruction. This
2491 means that we often have to dump a constant inside a function, and
2492 generate code to branch around it.
2494 It is important to minimize this, since the branches will slow things
2495 down and make things bigger.
2497 Worst case code looks like:
2515 We fix this by performing a scan before scheduling, which notices which
2516 instructions need to have their operands fetched from the constant table
2517 and builds the table.
2521 scan, find an instruction which needs a pcrel move. Look forward, find the
2522 last barrier which is within MAX_COUNT bytes of the requirement.
2523 If there isn't one, make one. Process all the instructions between
2524 the find and the barrier.
2526 In the above example, we can tell that L3 is within 1k of L1, so
2527 the first move can be shrunk from the 3 insn+constant sequence into
2528 just 1 insn, and the constant moved to L3 to make:
2539 Then the second move becomes the target for the shortening process. */
2543 rtx value; /* Value in table. */
2544 rtx label; /* Label of value. */
2545 rtx wend; /* End of window. */
2546 enum machine_mode mode; /* Mode of value. */
2548 /* True if this constant is accessed as part of a post-increment
2549 sequence. Note that HImode constants are never accessed in this way. */
2550 bool part_of_sequence_p;
2553 /* The maximum number of constants that can fit into one pool, since
2554 the pc relative range is 0...1020 bytes and constants are at least 4
2557 #define MAX_POOL_SIZE (1020/4)
2558 static pool_node pool_vector[MAX_POOL_SIZE];
2559 static int pool_size;
2560 static rtx pool_window_label;
2561 static int pool_window_last;
2563 /* ??? If we need a constant in HImode which is the truncated value of a
2564 constant we need in SImode, we could combine the two entries thus saving
2565 two bytes. Is this common enough to be worth the effort of implementing
2568 /* ??? This stuff should be done at the same time that we shorten branches.
2569 As it is now, we must assume that all branches are the maximum size, and
2570 this causes us to almost always output constant pools sooner than
2573 /* Add a constant to the pool and return its label. */
2576 add_constant (x, mode, last_value)
2578 enum machine_mode mode;
2582 rtx lab, new, ref, newref;
2584 /* First see if we've already got it. */
2585 for (i = 0; i < pool_size; i++)
2587 if (x->code == pool_vector[i].value->code
2588 && mode == pool_vector[i].mode)
2590 if (x->code == CODE_LABEL)
2592 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2595 if (rtx_equal_p (x, pool_vector[i].value))
2600 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2602 new = gen_label_rtx ();
2603 LABEL_REFS (new) = pool_vector[i].label;
2604 pool_vector[i].label = lab = new;
2606 if (lab && pool_window_label)
2608 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2609 ref = pool_vector[pool_window_last].wend;
2610 LABEL_NEXTREF (newref) = ref;
2611 pool_vector[pool_window_last].wend = newref;
2614 pool_window_label = new;
2615 pool_window_last = i;
2621 /* Need a new one. */
2622 pool_vector[pool_size].value = x;
2623 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2626 pool_vector[pool_size - 1].part_of_sequence_p = true;
2629 lab = gen_label_rtx ();
2630 pool_vector[pool_size].mode = mode;
2631 pool_vector[pool_size].label = lab;
2632 pool_vector[pool_size].wend = NULL_RTX;
2633 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2634 if (lab && pool_window_label)
2636 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2637 ref = pool_vector[pool_window_last].wend;
2638 LABEL_NEXTREF (newref) = ref;
2639 pool_vector[pool_window_last].wend = newref;
2642 pool_window_label = lab;
2643 pool_window_last = pool_size;
2648 /* Output the literal table. */
2659 /* Do two passes, first time dump out the HI sized constants. */
2661 for (i = 0; i < pool_size; i++)
2663 pool_node *p = &pool_vector[i];
2665 if (p->mode == HImode)
2669 scan = emit_insn_after (gen_align_2 (), scan);
2672 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2673 scan = emit_label_after (lab, scan);
2674 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2676 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2678 lab = XEXP (ref, 0);
2679 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2682 else if (p->mode == DFmode)
2688 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2690 rtx align_insn = NULL_RTX;
2692 scan = emit_label_after (gen_label_rtx (), scan);
2693 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2696 for (i = 0; i < pool_size; i++)
2698 pool_node *p = &pool_vector[i];
2706 if (align_insn && !p->part_of_sequence_p)
2708 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2709 emit_label_before (lab, align_insn);
2710 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2712 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2714 lab = XEXP (ref, 0);
2715 emit_insn_before (gen_consttable_window_end (lab),
2718 delete_insn (align_insn);
2719 align_insn = NULL_RTX;
2724 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2725 scan = emit_label_after (lab, scan);
2726 scan = emit_insn_after (gen_consttable_4 (p->value,
2728 need_align = ! need_align;
2734 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2739 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2740 scan = emit_label_after (lab, scan);
2741 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2749 if (p->mode != HImode)
2751 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2753 lab = XEXP (ref, 0);
2754 scan = emit_insn_after (gen_consttable_window_end (lab),
2763 for (i = 0; i < pool_size; i++)
2765 pool_node *p = &pool_vector[i];
2776 scan = emit_label_after (gen_label_rtx (), scan);
2777 scan = emit_insn_after (gen_align_4 (), scan);
2779 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2780 scan = emit_label_after (lab, scan);
2781 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2789 scan = emit_label_after (gen_label_rtx (), scan);
2790 scan = emit_insn_after (gen_align_4 (), scan);
2792 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2793 scan = emit_label_after (lab, scan);
2794 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2802 if (p->mode != HImode)
2804 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2806 lab = XEXP (ref, 0);
2807 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2812 scan = emit_insn_after (gen_consttable_end (), scan);
2813 scan = emit_barrier_after (scan);
2815 pool_window_label = NULL_RTX;
2816 pool_window_last = 0;
2819 /* Return nonzero if constant would be an ok source for a
2820 mov.w instead of a mov.l. */
2826 return (GET_CODE (src) == CONST_INT
2827 && INTVAL (src) >= -32768
2828 && INTVAL (src) <= 32767);
2831 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2833 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2834 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2835 need to fix it if the input value is CONST_OK_FOR_I. */
2841 if (GET_CODE (insn) == INSN)
2843 rtx pat = PATTERN (insn);
2844 if (GET_CODE (pat) == PARALLEL)
2845 pat = XVECEXP (pat, 0, 0);
2846 if (GET_CODE (pat) == SET
2847 /* We can load any 8 bit value if we don't care what the high
2848 order bits end up as. */
2849 && GET_MODE (SET_DEST (pat)) != QImode
2850 && (CONSTANT_P (SET_SRC (pat))
2851 /* Match mova_const. */
2852 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2853 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2854 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2856 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2857 && (fp_zero_operand (SET_SRC (pat))
2858 || fp_one_operand (SET_SRC (pat)))
2859 /* ??? If this is a -m4 or -m4-single compilation, in general
2860 we don't know the current setting of fpscr, so disable fldi.
2861 There is an exception if this was a register-register move
2862 before reload - and hence it was ascertained that we have
2863 single precision setting - and in a post-reload optimization
2864 we changed this to do a constant load. In that case
2865 we don't have an r0 clobber, hence we must use fldi. */
2866 && (! TARGET_SH4 || TARGET_FMOVD
2867 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2869 && GET_CODE (SET_DEST (pat)) == REG
2870 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2871 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2872 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2883 return (GET_CODE (insn) == INSN
2884 && GET_CODE (PATTERN (insn)) == SET
2885 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2886 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2887 /* Don't match mova_const. */
2888 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2891 /* Find the last barrier from insn FROM which is close enough to hold the
2892 constant pool. If we can't find one, then create one near the end of
2896 find_barrier (num_mova, mova, from)
2907 int leading_mova = num_mova;
2908 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2912 /* For HImode: range is 510, add 4 because pc counts from address of
2913 second instruction after this one, subtract 2 for the jump instruction
2914 that we may need to emit before the table, subtract 2 for the instruction
2915 that fills the jump delay slot (in very rare cases, reorg will take an
2916 instruction from after the constant pool or will leave the delay slot
2917 empty). This gives 510.
2918 For SImode: range is 1020, add 4 because pc counts from address of
2919 second instruction after this one, subtract 2 in case pc is 2 byte
2920 aligned, subtract 2 for the jump instruction that we may need to emit
2921 before the table, subtract 2 for the instruction that fills the jump
2922 delay slot. This gives 1018. */
2924 /* The branch will always be shortened now that the reference address for
2925 forward branches is the successor address, thus we need no longer make
2926 adjustments to the [sh]i_limit for -O0. */
2931 while (from && count_si < si_limit && count_hi < hi_limit)
2933 int inc = get_attr_length (from);
2936 if (GET_CODE (from) == CODE_LABEL)
2939 new_align = 1 << label_to_alignment (from);
2940 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2941 new_align = 1 << barrier_align (from);
2947 if (GET_CODE (from) == BARRIER)
2950 found_barrier = from;
2952 /* If we are at the end of the function, or in front of an alignment
2953 instruction, we need not insert an extra alignment. We prefer
2954 this kind of barrier. */
2955 if (barrier_align (from) > 2)
2956 good_barrier = from;
2959 if (broken_move (from))
2962 enum machine_mode mode;
2964 pat = PATTERN (from);
2965 if (GET_CODE (pat) == PARALLEL)
2966 pat = XVECEXP (pat, 0, 0);
2967 src = SET_SRC (pat);
2968 dst = SET_DEST (pat);
2969 mode = GET_MODE (dst);
2971 /* We must explicitly check the mode, because sometimes the
2972 front end will generate code to load unsigned constants into
2973 HImode targets without properly sign extending them. */
2975 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2978 /* We put the short constants before the long constants, so
2979 we must count the length of short constants in the range
2980 for the long constants. */
2981 /* ??? This isn't optimal, but is easy to do. */
2986 /* We dump DF/DI constants before SF/SI ones, because
2987 the limit is the same, but the alignment requirements
2988 are higher. We may waste up to 4 additional bytes
2989 for alignment, and the DF/DI constant may have
2990 another SF/SI constant placed before it. */
2991 if (TARGET_SHCOMPACT
2993 && (mode == DFmode || mode == DImode))
2998 while (si_align > 2 && found_si + si_align - 2 > count_si)
3000 if (found_si > count_si)
3001 count_si = found_si;
3002 found_si += GET_MODE_SIZE (mode);
3004 si_limit -= GET_MODE_SIZE (mode);
3007 /* See the code in machine_dependent_reorg, which has a similar if
3008 statement that generates a new mova insn in many cases. */
3009 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3019 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3021 if (found_si > count_si)
3022 count_si = found_si;
3024 else if (GET_CODE (from) == JUMP_INSN
3025 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3026 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3030 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3032 /* We have just passed the barrier in front of the
3033 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3034 the ADDR_DIFF_VEC is accessed as data, just like our pool
3035 constants, this is a good opportunity to accommodate what
3036 we have gathered so far.
3037 If we waited any longer, we could end up at a barrier in
3038 front of code, which gives worse cache usage for separated
3039 instruction / data caches. */
3040 good_barrier = found_barrier;
3045 rtx body = PATTERN (from);
3046 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3049 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3050 else if (GET_CODE (from) == JUMP_INSN
3052 && ! TARGET_SMALLCODE)
3058 if (new_align > si_align)
3060 si_limit -= (count_si - 1) & (new_align - si_align);
3061 si_align = new_align;
3063 count_si = (count_si + new_align - 1) & -new_align;
3068 if (new_align > hi_align)
3070 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3071 hi_align = new_align;
3073 count_hi = (count_hi + new_align - 1) & -new_align;
3075 from = NEXT_INSN (from);
3082 /* Try as we might, the leading mova is out of range. Change
3083 it into a load (which will become a pcload) and retry. */
3084 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3085 INSN_CODE (mova) = -1;
3086 return find_barrier (0, 0, mova);
3090 /* Insert the constant pool table before the mova instruction,
3091 to prevent the mova label reference from going out of range. */
3093 good_barrier = found_barrier = barrier_before_mova;
3099 if (good_barrier && next_real_insn (found_barrier))
3100 found_barrier = good_barrier;
3104 /* We didn't find a barrier in time to dump our stuff,
3105 so we'll make one. */
3106 rtx label = gen_label_rtx ();
3108 /* If we exceeded the range, then we must back up over the last
3109 instruction we looked at. Otherwise, we just need to undo the
3110 NEXT_INSN at the end of the loop. */
3111 if (count_hi > hi_limit || count_si > si_limit)
3112 from = PREV_INSN (PREV_INSN (from));
3114 from = PREV_INSN (from);
3116 /* Walk back to be just before any jump or label.
3117 Putting it before a label reduces the number of times the branch
3118 around the constant pool table will be hit. Putting it before
3119 a jump makes it more likely that the bra delay slot will be
3121 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3122 || GET_CODE (from) == CODE_LABEL)
3123 from = PREV_INSN (from);
3125 from = emit_jump_insn_after (gen_jump (label), from);
3126 JUMP_LABEL (from) = label;
3127 LABEL_NUSES (label) = 1;
3128 found_barrier = emit_barrier_after (from);
3129 emit_label_after (label, found_barrier);
3132 return found_barrier;
3135 /* If the instruction INSN is implemented by a special function, and we can
3136 positively find the register that is used to call the sfunc, and this
3137 register is not used anywhere else in this instruction - except as the
3138 destination of a set, return this register; else, return 0. */
3140 sfunc_uses_reg (insn)
3144 rtx pattern, part, reg_part, reg;
3146 if (GET_CODE (insn) != INSN)
3148 pattern = PATTERN (insn);
3149 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3152 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3154 part = XVECEXP (pattern, 0, i);
3155 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3160 reg = XEXP (reg_part, 0);
3161 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3163 part = XVECEXP (pattern, 0, i);
3164 if (part == reg_part || GET_CODE (part) == CLOBBER)
3166 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3167 && GET_CODE (SET_DEST (part)) == REG)
3168 ? SET_SRC (part) : part)))
3174 /* See if the only way in which INSN uses REG is by calling it, or by
3175 setting it while calling it. Set *SET to a SET rtx if the register
3179 noncall_uses_reg (reg, insn, set)
3188 reg2 = sfunc_uses_reg (insn);
3189 if (reg2 && REGNO (reg2) == REGNO (reg))
3191 pattern = single_set (insn);
3193 && GET_CODE (SET_DEST (pattern)) == REG
3194 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3198 if (GET_CODE (insn) != CALL_INSN)
3200 /* We don't use rtx_equal_p because we don't care if the mode is
3202 pattern = single_set (insn);
3204 && GET_CODE (SET_DEST (pattern)) == REG
3205 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3211 par = PATTERN (insn);
3212 if (GET_CODE (par) == PARALLEL)
3213 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3215 part = XVECEXP (par, 0, i);
3216 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3219 return reg_mentioned_p (reg, SET_SRC (pattern));
3225 pattern = PATTERN (insn);
3227 if (GET_CODE (pattern) == PARALLEL)
3231 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3232 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3234 pattern = XVECEXP (pattern, 0, 0);
3237 if (GET_CODE (pattern) == SET)
3239 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3241 /* We don't use rtx_equal_p, because we don't care if the
3242 mode is different. */
3243 if (GET_CODE (SET_DEST (pattern)) != REG
3244 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3250 pattern = SET_SRC (pattern);
3253 if (GET_CODE (pattern) != CALL
3254 || GET_CODE (XEXP (pattern, 0)) != MEM
3255 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3261 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3262 general registers. Bits 0..15 mean that the respective registers
3263 are used as inputs in the instruction. Bits 16..31 mean that the
3264 registers 0..15, respectively, are used as outputs, or are clobbered.
3265 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3267 regs_used (x, is_dest)
3276 code = GET_CODE (x);
3281 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3282 << (REGNO (x) + is_dest));
3286 rtx y = SUBREG_REG (x);
3288 if (GET_CODE (y) != REG)
3291 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3293 subreg_regno_offset (REGNO (y),
3296 GET_MODE (x)) + is_dest));
3300 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3302 /* If there was a return value, it must have been indicated with USE. */
3317 fmt = GET_RTX_FORMAT (code);
3319 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3324 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3325 used |= regs_used (XVECEXP (x, i, j), is_dest);
3327 else if (fmt[i] == 'e')
3328 used |= regs_used (XEXP (x, i), is_dest);
3333 /* Create an instruction that prevents redirection of a conditional branch
3334 to the destination of the JUMP with address ADDR.
3335 If the branch needs to be implemented as an indirect jump, try to find
3336 a scratch register for it.
3337 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3338 If any preceding insn that doesn't fit into a delay slot is good enough,
3339 pass 1. Pass 2 if a definite blocking insn is needed.
3340 -1 is used internally to avoid deep recursion.
3341 If a blocking instruction is made or recognized, return it. */
3344 gen_block_redirect (jump, addr, need_block)
3346 int addr, need_block;
3349 rtx prev = prev_nonnote_insn (jump);
3352 /* First, check if we already have an instruction that satisfies our need. */
3353 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3355 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3357 if (GET_CODE (PATTERN (prev)) == USE
3358 || GET_CODE (PATTERN (prev)) == CLOBBER
3359 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3361 else if ((need_block &= ~1) < 0)
3363 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3366 /* We can't use JUMP_LABEL here because it might be undefined
3367 when not optimizing. */
3368 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3369 /* If the branch is out of range, try to find a scratch register for it. */
3371 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3375 /* Don't look for the stack pointer as a scratch register,
3376 it would cause trouble if an interrupt occurred. */
3377 unsigned try = 0x7fff, used;
3378 int jump_left = flag_expensive_optimizations + 1;
3380 /* It is likely that the most recent eligible instruction is wanted for
3381 the delay slot. Therefore, find out which registers it uses, and
3382 try to avoid using them. */
3384 for (scan = jump; (scan = PREV_INSN (scan)); )
3388 if (INSN_DELETED_P (scan))
3390 code = GET_CODE (scan);
3391 if (code == CODE_LABEL || code == JUMP_INSN)
3394 && GET_CODE (PATTERN (scan)) != USE
3395 && GET_CODE (PATTERN (scan)) != CLOBBER
3396 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3398 try &= ~regs_used (PATTERN (scan), 0);
3402 for (used = dead = 0, scan = JUMP_LABEL (jump);
3403 (scan = NEXT_INSN (scan)); )
3407 if (INSN_DELETED_P (scan))
3409 code = GET_CODE (scan);
3410 if (GET_RTX_CLASS (code) == 'i')
3412 used |= regs_used (PATTERN (scan), 0);
3413 if (code == CALL_INSN)
3414 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3415 dead |= (used >> 16) & ~used;
3421 if (code == JUMP_INSN)
3423 if (jump_left-- && simplejump_p (scan))
3424 scan = JUMP_LABEL (scan);
3430 /* Mask out the stack pointer again, in case it was
3431 the only 'free' register we have found. */
3434 /* If the immediate destination is still in range, check for possible
3435 threading with a jump beyond the delay slot insn.
3436 Don't check if we are called recursively; the jump has been or will be
3437 checked in a different invocation then. */
3439 else if (optimize && need_block >= 0)
3441 rtx next = next_active_insn (next_active_insn (dest));
3442 if (next && GET_CODE (next) == JUMP_INSN
3443 && GET_CODE (PATTERN (next)) == SET
3444 && recog_memoized (next) == CODE_FOR_jump)
3446 dest = JUMP_LABEL (next);
3448 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3450 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3456 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3458 /* It would be nice if we could convert the jump into an indirect
3459 jump / far branch right now, and thus exposing all constituent
3460 instructions to further optimization. However, reorg uses
3461 simplejump_p to determine if there is an unconditional jump where
3462 it should try to schedule instructions from the target of the
3463 branch; simplejump_p fails for indirect jumps even if they have
3465 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3466 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3468 /* ??? We would like this to have the scope of the jump, but that
3469 scope will change when a delay slot insn of an inner scope is added.
3470 Hence, after delay slot scheduling, we'll have to expect
3471 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3474 INSN_SCOPE (insn) = INSN_SCOPE (jump);
3475 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3478 else if (need_block)
3479 /* We can't use JUMP_LABEL here because it might be undefined
3480 when not optimizing. */
3481 return emit_insn_before (gen_block_branch_redirect
3482 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3487 #define CONDJUMP_MIN -252
3488 #define CONDJUMP_MAX 262
3491 /* A label (to be placed) in front of the jump
3492 that jumps to our ultimate destination. */
3494 /* Where we are going to insert it if we cannot move the jump any farther,
3495 or the jump itself if we have picked up an existing jump. */
3497 /* The ultimate destination. */
3499 struct far_branch *prev;
3500 /* If the branch has already been created, its address;
3501 else the address of its first prospective user. */
3505 static void gen_far_branch PARAMS ((struct far_branch *));
3506 enum mdep_reorg_phase_e mdep_reorg_phase;
3509 struct far_branch *bp;
3511 rtx insn = bp->insert_place;
3513 rtx label = gen_label_rtx ();
3515 emit_label_after (label, insn);
3518 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3519 LABEL_NUSES (bp->far_label)++;
3522 jump = emit_jump_insn_after (gen_return (), insn);
3523 /* Emit a barrier so that reorg knows that any following instructions
3524 are not reachable via a fall-through path.
3525 But don't do this when not optimizing, since we wouldn't supress the
3526 alignment for the barrier then, and could end up with out-of-range
3527 pc-relative loads. */
3529 emit_barrier_after (jump);
3530 emit_label_after (bp->near_label, insn);
3531 JUMP_LABEL (jump) = bp->far_label;
3532 if (! invert_jump (insn, label, 1))
3535 (gen_stuff_delay_slot
3536 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3537 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3539 /* Prevent reorg from undoing our splits. */
3540 gen_block_redirect (jump, bp->address += 2, 2);
3543 /* Fix up ADDR_DIFF_VECs. */
3545 fixup_addr_diff_vecs (first)
3550 for (insn = first; insn; insn = NEXT_INSN (insn))
3552 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3554 if (GET_CODE (insn) != JUMP_INSN
3555 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3557 pat = PATTERN (insn);
3558 vec_lab = XEXP (XEXP (pat, 0), 0);
3560 /* Search the matching casesi_jump_2. */
3561 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3563 if (GET_CODE (prev) != JUMP_INSN)
3565 prevpat = PATTERN (prev);
3566 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3568 x = XVECEXP (prevpat, 0, 1);
3569 if (GET_CODE (x) != USE)
3572 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3576 /* Emit the reference label of the braf where it belongs, right after
3577 the casesi_jump_2 (i.e. braf). */
3578 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3579 emit_label_after (braf_label, prev);
3581 /* Fix up the ADDR_DIF_VEC to be relative
3582 to the reference address of the braf. */
3583 XEXP (XEXP (pat, 0), 0) = braf_label;
3587 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3588 a barrier. Return the base 2 logarithm of the desired alignment. */
3590 barrier_align (barrier_or_label)
3591 rtx barrier_or_label;
3593 rtx next = next_real_insn (barrier_or_label), pat, prev;
3594 int slot, credit, jump_to_next = 0;
3599 pat = PATTERN (next);
3601 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3604 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3605 /* This is a barrier in front of a constant table. */
3608 prev = prev_real_insn (barrier_or_label);
3609 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3611 pat = PATTERN (prev);
3612 /* If this is a very small table, we want to keep the alignment after
3613 the table to the minimum for proper code alignment. */
3614 return ((TARGET_SMALLCODE
3615 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3616 <= (unsigned)1 << (CACHE_LOG - 2)))
3617 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3620 if (TARGET_SMALLCODE)
3623 if (! TARGET_SH2 || ! optimize)
3624 return align_jumps_log;
3626 /* When fixing up pcloads, a constant table might be inserted just before
3627 the basic block that ends with the barrier. Thus, we can't trust the
3628 instruction lengths before that. */
3629 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3631 /* Check if there is an immediately preceding branch to the insn beyond
3632 the barrier. We must weight the cost of discarding useful information
3633 from the current cache line when executing this branch and there is
3634 an alignment, against that of fetching unneeded insn in front of the
3635 branch target when there is no alignment. */
3637 /* There are two delay_slot cases to consider. One is the simple case
3638 where the preceding branch is to the insn beyond the barrier (simple
3639 delay slot filling), and the other is where the preceding branch has
3640 a delay slot that is a duplicate of the insn after the barrier
3641 (fill_eager_delay_slots) and the branch is to the insn after the insn
3642 after the barrier. */
3644 /* PREV is presumed to be the JUMP_INSN for the barrier under
3645 investigation. Skip to the insn before it. */
3646 prev = prev_real_insn (prev);
3648 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3649 credit >= 0 && prev && GET_CODE (prev) == INSN;
3650 prev = prev_real_insn (prev))
3653 if (GET_CODE (PATTERN (prev)) == USE
3654 || GET_CODE (PATTERN (prev)) == CLOBBER)
3656 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3658 prev = XVECEXP (PATTERN (prev), 0, 1);
3659 if (INSN_UID (prev) == INSN_UID (next))
3661 /* Delay slot was filled with insn at jump target. */
3668 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3670 credit -= get_attr_length (prev);
3673 && GET_CODE (prev) == JUMP_INSN
3674 && JUMP_LABEL (prev))
3678 || next_real_insn (JUMP_LABEL (prev)) == next
3679 /* If relax_delay_slots() decides NEXT was redundant
3680 with some previous instruction, it will have
3681 redirected PREV's jump to the following insn. */
3682 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3683 /* There is no upper bound on redundant instructions
3684 that might have been skipped, but we must not put an
3685 alignment where none had been before. */
3686 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3688 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3689 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch))))
3691 rtx pat = PATTERN (prev);
3692 if (GET_CODE (pat) == PARALLEL)
3693 pat = XVECEXP (pat, 0, 0);
3694 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3700 return align_jumps_log;
3703 /* If we are inside a phony loop, almost any kind of label can turn up as the
3704 first one in the loop. Aligning a braf label causes incorrect switch
3705 destination addresses; we can detect braf labels because they are
3706 followed by a BARRIER.
3707 Applying loop alignment to small constant or switch tables is a waste
3708 of space, so we suppress this too. */
3710 sh_loop_align (label)
3716 next = next_nonnote_insn (next);
3717 while (next && GET_CODE (next) == CODE_LABEL);
3721 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3722 || recog_memoized (next) == CODE_FOR_consttable_2)
3725 return align_loops_log;
3728 /* Exported to toplev.c.
3730 Do a final pass over the function, just before delayed branch
3734 machine_dependent_reorg (first)
3737 rtx insn, mova = NULL_RTX;
3739 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3740 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3742 /* We must split call insns before introducing `mova's. If we're
3743 optimizing, they'll have already been split. Otherwise, make
3744 sure we don't split them too late. */
3746 split_all_insns_noflow ();
3751 /* If relaxing, generate pseudo-ops to associate function calls with
3752 the symbols they call. It does no harm to not generate these
3753 pseudo-ops. However, when we can generate them, it enables to
3754 linker to potentially relax the jsr to a bsr, and eliminate the
3755 register load and, possibly, the constant pool entry. */
3757 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3760 /* Remove all REG_LABEL notes. We want to use them for our own
3761 purposes. This works because none of the remaining passes
3762 need to look at them.
3764 ??? But it may break in the future. We should use a machine
3765 dependent REG_NOTE, or some other approach entirely. */
3766 for (insn = first; insn; insn = NEXT_INSN (insn))
3772 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3773 remove_note (insn, note);
3777 for (insn = first; insn; insn = NEXT_INSN (insn))
3779 rtx pattern, reg, link, set, scan, dies, label;
3780 int rescan = 0, foundinsn = 0;
3782 if (GET_CODE (insn) == CALL_INSN)
3784 pattern = PATTERN (insn);
3786 if (GET_CODE (pattern) == PARALLEL)
3787 pattern = XVECEXP (pattern, 0, 0);
3788 if (GET_CODE (pattern) == SET)
3789 pattern = SET_SRC (pattern);
3791 if (GET_CODE (pattern) != CALL
3792 || GET_CODE (XEXP (pattern, 0)) != MEM)
3795 reg = XEXP (XEXP (pattern, 0), 0);
3799 reg = sfunc_uses_reg (insn);
3804 if (GET_CODE (reg) != REG)
3807 /* This is a function call via REG. If the only uses of REG
3808 between the time that it is set and the time that it dies
3809 are in function calls, then we can associate all the
3810 function calls with the setting of REG. */
3812 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3814 if (REG_NOTE_KIND (link) != 0)
3816 set = single_set (XEXP (link, 0));
3817 if (set && rtx_equal_p (reg, SET_DEST (set)))
3819 link = XEXP (link, 0);
3826 /* ??? Sometimes global register allocation will have
3827 deleted the insn pointed to by LOG_LINKS. Try
3828 scanning backward to find where the register is set. */
3829 for (scan = PREV_INSN (insn);
3830 scan && GET_CODE (scan) != CODE_LABEL;
3831 scan = PREV_INSN (scan))
3833 if (! INSN_P (scan))
3836 if (! reg_mentioned_p (reg, scan))
3839 if (noncall_uses_reg (reg, scan, &set))
3853 /* The register is set at LINK. */
3855 /* We can only optimize the function call if the register is
3856 being set to a symbol. In theory, we could sometimes
3857 optimize calls to a constant location, but the assembler
3858 and linker do not support that at present. */
3859 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3860 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3863 /* Scan forward from LINK to the place where REG dies, and
3864 make sure that the only insns which use REG are
3865 themselves function calls. */
3867 /* ??? This doesn't work for call targets that were allocated
3868 by reload, since there may not be a REG_DEAD note for the
3872 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3876 /* Don't try to trace forward past a CODE_LABEL if we haven't
3877 seen INSN yet. Ordinarily, we will only find the setting insn
3878 in LOG_LINKS if it is in the same basic block. However,
3879 cross-jumping can insert code labels in between the load and
3880 the call, and can result in situations where a single call
3881 insn may have two targets depending on where we came from. */
3883 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3886 if (! INSN_P (scan))
3889 /* Don't try to trace forward past a JUMP. To optimize
3890 safely, we would have to check that all the
3891 instructions at the jump destination did not use REG. */
3893 if (GET_CODE (scan) == JUMP_INSN)
3896 if (! reg_mentioned_p (reg, scan))
3899 if (noncall_uses_reg (reg, scan, &scanset))
3906 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3908 /* There is a function call to this register other
3909 than the one we are checking. If we optimize
3910 this call, we need to rescan again below. */
3914 /* ??? We shouldn't have to worry about SCANSET here.
3915 We should just be able to check for a REG_DEAD note
3916 on a function call. However, the REG_DEAD notes are
3917 apparently not dependable around libcalls; c-torture
3918 execute/920501-2 is a test case. If SCANSET is set,
3919 then this insn sets the register, so it must have
3920 died earlier. Unfortunately, this will only handle
3921 the cases in which the register is, in fact, set in a
3924 /* ??? We shouldn't have to use FOUNDINSN here.
3925 However, the LOG_LINKS fields are apparently not
3926 entirely reliable around libcalls;
3927 newlib/libm/math/e_pow.c is a test case. Sometimes
3928 an insn will appear in LOG_LINKS even though it is
3929 not the most recent insn which sets the register. */
3933 || find_reg_note (scan, REG_DEAD, reg)))
3942 /* Either there was a branch, or some insn used REG
3943 other than as a function call address. */
3947 /* Create a code label, and put it in a REG_LABEL note on
3948 the insn which sets the register, and on each call insn
3949 which uses the register. In final_prescan_insn we look
3950 for the REG_LABEL notes, and output the appropriate label
3953 label = gen_label_rtx ();
3954 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3956 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3965 scan = NEXT_INSN (scan);
3967 && ((GET_CODE (scan) == CALL_INSN
3968 && reg_mentioned_p (reg, scan))
3969 || ((reg2 = sfunc_uses_reg (scan))
3970 && REGNO (reg2) == REGNO (reg))))
3972 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3974 while (scan != dies);
3980 fixup_addr_diff_vecs (first);
3984 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3985 shorten_branches (first);
3987 /* Scan the function looking for move instructions which have to be
3988 changed to pc-relative loads and insert the literal tables. */
3990 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3991 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3998 else if (GET_CODE (insn) == JUMP_INSN
3999 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4007 /* Some code might have been inserted between the mova and
4008 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4009 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4010 total += get_attr_length (scan);
4012 /* range of mova is 1020, add 4 because pc counts from address of
4013 second instruction after this one, subtract 2 in case pc is 2
4014 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4015 cancels out with alignment effects of the mova itself. */
4018 /* Change the mova into a load, and restart scanning
4019 there. broken_move will then return true for mova. */
4020 SET_SRC (PATTERN (mova))
4021 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4022 INSN_CODE (mova) = -1;
4026 if (broken_move (insn))
4029 /* Scan ahead looking for a barrier to stick the constant table
4031 rtx barrier = find_barrier (num_mova, mova, insn);
4032 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4034 if (num_mova && ! mova_p (mova))
4036 /* find_barrier had to change the first mova into a
4037 pcload; thus, we have to start with this new pcload. */
4041 /* Now find all the moves between the points and modify them. */
4042 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4044 if (GET_CODE (scan) == CODE_LABEL)
4046 if (broken_move (scan))
4048 rtx *patp = &PATTERN (scan), pat = *patp;
4052 enum machine_mode mode;
4054 if (GET_CODE (pat) == PARALLEL)
4055 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4056 src = SET_SRC (pat);
4057 dst = SET_DEST (pat);
4058 mode = GET_MODE (dst);
4060 if (mode == SImode && hi_const (src)
4061 && REGNO (dst) != FPUL_REG)
4066 while (GET_CODE (dst) == SUBREG)
4068 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4069 GET_MODE (SUBREG_REG (dst)),
4072 dst = SUBREG_REG (dst);
4074 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4077 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4079 /* This must be an insn that clobbers r0. */
4080 rtx clobber = XVECEXP (PATTERN (scan), 0,
4081 XVECLEN (PATTERN (scan), 0) - 1);
4083 if (GET_CODE (clobber) != CLOBBER
4084 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4088 && reg_set_between_p (r0_rtx, last_float_move, scan))
4092 && GET_MODE_SIZE (mode) != 4
4093 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4095 lab = add_constant (src, mode, last_float);
4097 emit_insn_before (gen_mova (lab), scan);
4100 /* There will be a REG_UNUSED note for r0 on
4101 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4102 lest reorg:mark_target_live_regs will not
4103 consider r0 to be used, and we end up with delay
4104 slot insn in front of SCAN that clobbers r0. */
4106 = find_regno_note (last_float_move, REG_UNUSED, 0);
4108 /* If we are not optimizing, then there may not be
4111 PUT_MODE (note, REG_INC);
4113 *last_float_addr = r0_inc_rtx;
4115 last_float_move = scan;
4117 newsrc = gen_rtx (MEM, mode,
4118 (((TARGET_SH4 && ! TARGET_FMOVD)
4119 || REGNO (dst) == FPUL_REG)
4122 last_float_addr = &XEXP (newsrc, 0);
4124 /* Remove the clobber of r0. */
4125 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4126 RTX_UNCHANGING_P (newsrc) = 1;
4128 /* This is a mova needing a label. Create it. */
4129 else if (GET_CODE (src) == UNSPEC
4130 && XINT (src, 1) == UNSPEC_MOVA
4131 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4133 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4134 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4135 newsrc = gen_rtx_UNSPEC (SImode,
4136 gen_rtvec (1, newsrc),
4141 lab = add_constant (src, mode, 0);
4142 newsrc = gen_rtx_MEM (mode,
4143 gen_rtx_LABEL_REF (VOIDmode, lab));
4144 RTX_UNCHANGING_P (newsrc) = 1;
4146 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4147 INSN_CODE (scan) = -1;
4150 dump_table (barrier);
4155 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4156 INSN_ADDRESSES_FREE ();
4157 split_branches (first);
4159 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4160 also has an effect on the register that holds the address of the sfunc.
4161 Insert an extra dummy insn in front of each sfunc that pretends to
4162 use this register. */
4163 if (flag_delayed_branch)
4165 for (insn = first; insn; insn = NEXT_INSN (insn))
4167 rtx reg = sfunc_uses_reg (insn);
4171 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4175 /* fpscr is not actually a user variable, but we pretend it is for the
4176 sake of the previous optimization passes, since we want it handled like
4177 one. However, we don't have any debugging information for it, so turn
4178 it into a non-user variable now. */
4180 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4182 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4186 get_dest_uid (label, max_uid)
4190 rtx dest = next_real_insn (label);
4193 /* This can happen for an undefined label. */
4195 dest_uid = INSN_UID (dest);
4196 /* If this is a newly created branch redirection blocking instruction,
4197 we cannot index the branch_uid or insn_addresses arrays with its
4198 uid. But then, we won't need to, because the actual destination is
4199 the following branch. */
4200 while (dest_uid >= max_uid)
4202 dest = NEXT_INSN (dest);
4203 dest_uid = INSN_UID (dest);
4205 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4210 /* Split condbranches that are out of range. Also add clobbers for
4211 scratch registers that are needed in far jumps.
4212 We do this before delay slot scheduling, so that it can take our
4213 newly created instructions into account. It also allows us to
4214 find branches with common targets more easily. */
4217 split_branches (first)
4221 struct far_branch **uid_branch, *far_branch_list = 0;
4222 int max_uid = get_max_uid ();
4224 /* Find out which branches are out of range. */
4225 shorten_branches (first);
4227 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4228 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4230 for (insn = first; insn; insn = NEXT_INSN (insn))
4231 if (! INSN_P (insn))
4233 else if (INSN_DELETED_P (insn))
4235 /* Shorten_branches would split this instruction again,
4236 so transform it into a note. */
4237 PUT_CODE (insn, NOTE);
4238 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4239 NOTE_SOURCE_FILE (insn) = 0;
4241 else if (GET_CODE (insn) == JUMP_INSN
4242 /* Don't mess with ADDR_DIFF_VEC */
4243 && (GET_CODE (PATTERN (insn)) == SET
4244 || GET_CODE (PATTERN (insn)) == RETURN))
4246 enum attr_type type = get_attr_type (insn);
4247 if (type == TYPE_CBRANCH)
4251 if (get_attr_length (insn) > 4)
4253 rtx src = SET_SRC (PATTERN (insn));
4254 rtx olabel = XEXP (XEXP (src, 1), 0);
4255 int addr = INSN_ADDRESSES (INSN_UID (insn));
4257 int dest_uid = get_dest_uid (olabel, max_uid);
4258 struct far_branch *bp = uid_branch[dest_uid];
4260 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4261 the label if the LABEL_NUSES count drops to zero. There is
4262 always a jump_optimize pass that sets these values, but it
4263 proceeds to delete unreferenced code, and then if not
4264 optimizing, to un-delete the deleted instructions, thus
4265 leaving labels with too low uses counts. */
4268 JUMP_LABEL (insn) = olabel;
4269 LABEL_NUSES (olabel)++;
4273 bp = (struct far_branch *) alloca (sizeof *bp);
4274 uid_branch[dest_uid] = bp;
4275 bp->prev = far_branch_list;
4276 far_branch_list = bp;
4278 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4279 LABEL_NUSES (bp->far_label)++;
4283 label = bp->near_label;
4284 if (! label && bp->address - addr >= CONDJUMP_MIN)
4286 rtx block = bp->insert_place;
4288 if (GET_CODE (PATTERN (block)) == RETURN)
4289 block = PREV_INSN (block);
4291 block = gen_block_redirect (block,
4293 label = emit_label_after (gen_label_rtx (),
4295 bp->near_label = label;
4297 else if (label && ! NEXT_INSN (label))
4299 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4300 bp->insert_place = insn;
4302 gen_far_branch (bp);
4306 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4308 bp->near_label = label = gen_label_rtx ();
4309 bp->insert_place = insn;
4312 if (! redirect_jump (insn, label, 1))
4317 /* get_attr_length (insn) == 2 */
4318 /* Check if we have a pattern where reorg wants to redirect
4319 the branch to a label from an unconditional branch that
4321 /* We can't use JUMP_LABEL here because it might be undefined
4322 when not optimizing. */
4323 /* A syntax error might cause beyond to be NULL_RTX. */
4325 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4329 && (GET_CODE (beyond) == JUMP_INSN
4330 || ((beyond = next_active_insn (beyond))
4331 && GET_CODE (beyond) == JUMP_INSN))
4332 && GET_CODE (PATTERN (beyond)) == SET
4333 && recog_memoized (beyond) == CODE_FOR_jump
4335 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4336 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4338 gen_block_redirect (beyond,
4339 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4342 next = next_active_insn (insn);
4344 if ((GET_CODE (next) == JUMP_INSN
4345 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4346 && GET_CODE (PATTERN (next)) == SET
4347 && recog_memoized (next) == CODE_FOR_jump
4349 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4350 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4352 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4354 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4356 int addr = INSN_ADDRESSES (INSN_UID (insn));
4359 struct far_branch *bp;
4361 if (type == TYPE_JUMP)
4363 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4364 dest_uid = get_dest_uid (far_label, max_uid);
4367 /* Parse errors can lead to labels outside
4369 if (! NEXT_INSN (far_label))
4374 JUMP_LABEL (insn) = far_label;
4375 LABEL_NUSES (far_label)++;
4377 redirect_jump (insn, NULL_RTX, 1);
4381 bp = uid_branch[dest_uid];
4384 bp = (struct far_branch *) alloca (sizeof *bp);
4385 uid_branch[dest_uid] = bp;
4386 bp->prev = far_branch_list;
4387 far_branch_list = bp;
4389 bp->far_label = far_label;
4391 LABEL_NUSES (far_label)++;
4393 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4394 if (addr - bp->address <= CONDJUMP_MAX)
4395 emit_label_after (bp->near_label, PREV_INSN (insn));
4398 gen_far_branch (bp);
4404 bp->insert_place = insn;
4406 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4408 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4411 /* Generate all pending far branches,
4412 and free our references to the far labels. */
4413 while (far_branch_list)
4415 if (far_branch_list->near_label
4416 && ! NEXT_INSN (far_branch_list->near_label))
4417 gen_far_branch (far_branch_list);
4419 && far_branch_list->far_label
4420 && ! --LABEL_NUSES (far_branch_list->far_label))
4421 delete_insn (far_branch_list->far_label);
4422 far_branch_list = far_branch_list->prev;
4425 /* Instruction length information is no longer valid due to the new
4426 instructions that have been generated. */
4427 init_insn_lengths ();
4430 /* Dump out instruction addresses, which is useful for debugging the
4431 constant pool table stuff.
4433 If relaxing, output the label and pseudo-ops used to link together
4434 calls and the instruction which set the registers. */
4436 /* ??? The addresses printed by this routine for insns are nonsense for
4437 insns which are inside of a sequence where none of the inner insns have
4438 variable length. This is because the second pass of shorten_branches
4439 does not bother to update them. */
4442 final_prescan_insn (insn, opvec, noperands)
4444 rtx *opvec ATTRIBUTE_UNUSED;
4445 int noperands ATTRIBUTE_UNUSED;
4447 if (TARGET_DUMPISIZE)
4448 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4454 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4459 pattern = PATTERN (insn);
4460 if (GET_CODE (pattern) == PARALLEL)
4461 pattern = XVECEXP (pattern, 0, 0);
4462 if (GET_CODE (pattern) == CALL
4463 || (GET_CODE (pattern) == SET
4464 && (GET_CODE (SET_SRC (pattern)) == CALL
4465 || get_attr_type (insn) == TYPE_SFUNC)))
4466 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4467 CODE_LABEL_NUMBER (XEXP (note, 0)));
4468 else if (GET_CODE (pattern) == SET)
4469 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4470 CODE_LABEL_NUMBER (XEXP (note, 0)));
4477 /* Dump out any constants accumulated in the final pass. These will
4481 output_jump_label_table ()
4487 fprintf (asm_out_file, "\t.align 2\n");
4488 for (i = 0; i < pool_size; i++)
4490 pool_node *p = &pool_vector[i];
4492 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4493 CODE_LABEL_NUMBER (p->label));
4494 output_asm_insn (".long %O0", &p->value);
4502 /* A full frame looks like:
4506 [ if current_function_anonymous_args
4519 local-0 <- fp points here. */
4521 /* Number of bytes pushed for anonymous args, used to pass information
4522 between expand_prologue and expand_epilogue. */
4524 static int extra_push;
4526 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4527 to be adjusted, and TEMP, if nonnegative, holds the register number
4528 of a general register that we may clobber. */
4531 output_stack_adjust (size, reg, temp, emit_fn)
4535 rtx (*emit_fn) PARAMS ((rtx));
4539 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4544 if (CONST_OK_FOR_ADD (size))
4545 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4546 /* Try to do it with two partial adjustments; however, we must make
4547 sure that the stack is properly aligned at all times, in case
4548 an interrupt occurs between the two partial adjustments. */
4549 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4550 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4552 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4553 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4560 /* If TEMP is invalid, we could temporarily save a general
4561 register to MACL. However, there is currently no need
4562 to handle this case, so just abort when we see it. */
4565 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4567 /* If SIZE is negative, subtract the positive value.
4568 This sometimes allows a constant pool entry to be shared
4569 between prologue and epilogue code. */
4572 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4573 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4577 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4578 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4580 if (emit_fn == frame_insn)
4582 = (gen_rtx_EXPR_LIST
4583 (REG_FRAME_RELATED_EXPR,
4584 gen_rtx_SET (VOIDmode, reg,
4585 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4596 RTX_FRAME_RELATED_P (x) = 1;
4600 /* Output RTL to push register RN onto the stack. */
4608 x = gen_push_fpul ();
4609 else if (rn == FPSCR_REG)
4610 x = gen_push_fpscr ();
4611 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4612 && FP_OR_XD_REGISTER_P (rn))
4614 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4616 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4618 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4619 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4621 x = gen_push (gen_rtx_REG (SImode, rn));
4625 = gen_rtx_EXPR_LIST (REG_INC,
4626 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4630 /* Output RTL to pop register RN from the stack. */
4638 x = gen_pop_fpul ();
4639 else if (rn == FPSCR_REG)
4640 x = gen_pop_fpscr ();
4641 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4642 && FP_OR_XD_REGISTER_P (rn))
4644 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4646 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4648 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4649 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4651 x = gen_pop (gen_rtx_REG (SImode, rn));
4655 = gen_rtx_EXPR_LIST (REG_INC,
4656 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4659 /* Generate code to push the regs specified in the mask. */
4662 push_regs (mask, interrupt_handler)
4664 int interrupt_handler;
4669 /* Push PR last; this gives better latencies after the prologue, and
4670 candidates for the return delay slot when there are no general
4671 registers pushed. */
4672 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4674 /* If this is an interrupt handler, and the SZ bit varies,
4675 and we have to push any floating point register, we need
4676 to switch to the correct precision first. */
4677 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4678 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4680 HARD_REG_SET unsaved;
4683 COMPL_HARD_REG_SET(unsaved, *mask);
4684 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4688 && (i != FPSCR_REG || ! skip_fpscr)
4689 && TEST_HARD_REG_BIT (*mask, i))
4692 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4696 /* Work out the registers which need to be saved, both as a mask and a
4697 count of saved words. Return the count.
4699 If doing a pragma interrupt function, then push all regs used by the
4700 function, and if we call another function (we can tell by looking at PR),
4701 make sure that all the regs it clobbers are safe too. */
4704 calc_live_regs (live_regs_mask)
4705 HARD_REG_SET *live_regs_mask;
4709 int interrupt_handler;
4712 interrupt_handler = sh_cfun_interrupt_handler_p ();
4714 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4715 CLEAR_HARD_REG_SET (*live_regs_mask);
4716 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4717 && regs_ever_live[FPSCR_REG])
4718 target_flags &= ~FPU_SINGLE_BIT;
4719 /* If we can save a lot of saves by switching to double mode, do that. */
4720 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4721 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4722 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4723 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4726 target_flags &= ~FPU_SINGLE_BIT;
4729 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4730 knows how to use it. That means the pseudo originally allocated for
4731 the initial value can become the PR_MEDIA_REG hard register, as seen for
4732 execute/20010122-1.c:test9. */
4734 pr_live = regs_ever_live[PR_MEDIA_REG];
4737 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4738 pr_live = (pr_initial
4739 ? (GET_CODE (pr_initial) != REG
4740 || REGNO (pr_initial) != (PR_REG))
4741 : regs_ever_live[PR_REG]);
4743 /* Force PR to be live if the prologue has to call the SHmedia
4744 argument decoder or register saver. */
4745 if (TARGET_SHCOMPACT
4746 && ((current_function_args_info.call_cookie
4747 & ~ CALL_COOKIE_RET_TRAMP (1))
4748 || current_function_has_nonlocal_label))
4750 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4752 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4754 : (interrupt_handler && ! pragma_trapa)
4755 ? (/* Need to save all the regs ever live. */
4756 (regs_ever_live[reg]
4757 || (call_used_regs[reg]
4758 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4760 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4761 && reg != RETURN_ADDRESS_POINTER_REGNUM
4762 && reg != T_REG && reg != GBR_REG
4763 /* Push fpscr only on targets which have FPU */
4764 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4765 : (/* Only push those regs which are used and need to be saved. */
4768 && current_function_args_info.call_cookie
4769 && reg == PIC_OFFSET_TABLE_REGNUM)
4770 || (regs_ever_live[reg] && ! call_used_regs[reg])
4771 || (current_function_calls_eh_return
4772 && (reg == EH_RETURN_DATA_REGNO (0)
4773 || reg == EH_RETURN_DATA_REGNO (1)
4774 || reg == EH_RETURN_DATA_REGNO (2)
4775 || reg == EH_RETURN_DATA_REGNO (3)))))
4777 SET_HARD_REG_BIT (*live_regs_mask, reg);
4778 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4780 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4781 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4783 if (FP_REGISTER_P (reg))
4785 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4787 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
4788 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4791 else if (XD_REGISTER_P (reg))
4793 /* Must switch to double mode to access these registers. */
4794 target_flags &= ~FPU_SINGLE_BIT;
4803 /* Code to generate prologue and epilogue sequences */
4805 /* PUSHED is the number of bytes that are being pushed on the
4806 stack for register saves. Return the frame size, padded
4807 appropriately so that the stack stays properly aligned. */
4808 static HOST_WIDE_INT
4809 rounded_frame_size (pushed)
4812 HOST_WIDE_INT size = get_frame_size ();
4813 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4815 return ((size + pushed + align - 1) & -align) - pushed;
4818 /* Choose a call-clobbered target-branch register that remains
4819 unchanged along the whole function. We set it up as the return
4820 value in the prologue. */
4822 sh_media_register_for_return ()
4827 if (! current_function_is_leaf)
4830 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4832 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4833 if (call_used_regs[regno] && ! regs_ever_live[regno])
4840 sh_expand_prologue ()
4842 HARD_REG_SET live_regs_mask;
4845 int save_flags = target_flags;
4847 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4849 /* We have pretend args if we had an object sent partially in registers
4850 and partially on the stack, e.g. a large structure. */
4851 output_stack_adjust (-current_function_pretend_args_size
4852 - current_function_args_info.stack_regs * 8,
4853 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4857 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4858 /* We're going to use the PIC register to load the address of the
4859 incoming-argument decoder and/or of the return trampoline from
4860 the GOT, so make sure the PIC register is preserved and
4862 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4864 if (TARGET_SHCOMPACT
4865 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4869 /* First, make all registers with incoming arguments that will
4870 be pushed onto the stack live, so that register renaming
4871 doesn't overwrite them. */
4872 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4873 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4874 >= NPARM_REGS (SImode) - reg)
4875 for (; reg < NPARM_REGS (SImode); reg++)
4876 emit_insn (gen_shcompact_preserve_incoming_args
4877 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4878 else if (CALL_COOKIE_INT_REG_GET
4879 (current_function_args_info.call_cookie, reg) == 1)
4880 emit_insn (gen_shcompact_preserve_incoming_args
4881 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4883 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4885 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4886 GEN_INT (current_function_args_info.call_cookie));
4887 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4888 gen_rtx_REG (SImode, R0_REG));
4890 else if (TARGET_SHMEDIA)
4892 int tr = sh_media_register_for_return ();
4896 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4897 gen_rtx_REG (DImode, PR_MEDIA_REG));
4899 /* If this function only exits with sibcalls, this copy
4900 will be flagged as dead. */
4901 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4907 /* Emit the code for SETUP_VARARGS. */
4908 if (current_function_stdarg)
4910 /* This is not used by the SH2E calling convention */
4911 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 && ! TARGET_HITACHI)
4913 /* Push arg regs as if they'd been provided by caller in stack. */
4914 for (i = 0; i < NPARM_REGS(SImode); i++)
4916 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4919 if (i >= (NPARM_REGS(SImode)
4920 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4924 RTX_FRAME_RELATED_P (insn) = 0;
4930 /* If we're supposed to switch stacks at function entry, do so now. */
4932 emit_insn (gen_sp_switch_1 ());
4934 d = calc_live_regs (&live_regs_mask);
4935 /* ??? Maybe we could save some switching if we can move a mode switch
4936 that already happens to be at the function start into the prologue. */
4937 if (target_flags != save_flags && ! current_function_interrupt)
4938 emit_insn (gen_toggle_sz ());
4945 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4946 int offset_in_r0 = -1;
4949 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4950 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4951 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4953 offset = d + d_rounding;
4954 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4956 /* We loop twice: first, we save 8-byte aligned registers in the
4957 higher addresses, that are known to be aligned. Then, we
4958 proceed to saving 32-bit registers that don't need 8-byte
4960 /* Note that if you change this code in a way that affects where
4961 the return register is saved, you have to update not only
4962 sh_expand_epilogue, but also sh_set_return_address. */
4963 for (align = 1; align >= 0; align--)
4964 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4965 if (TEST_HARD_REG_BIT (live_regs_mask, i))
4967 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4969 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4971 if (mode == SFmode && (i % 2) == 1
4972 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4973 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
4980 /* If we're doing the aligned pass and this is not aligned,
4981 or we're doing the unaligned pass and this is aligned,
4983 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4987 offset -= GET_MODE_SIZE (mode);
4989 reg_rtx = gen_rtx_REG (mode, reg);
4991 mem_rtx = gen_rtx_MEM (mode,
4992 gen_rtx_PLUS (Pmode,
4996 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5002 if (HAVE_PRE_DECREMENT
5003 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5004 || mem_rtx == NULL_RTX
5005 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5007 pre_dec = gen_rtx_MEM (mode,
5008 gen_rtx_PRE_DEC (Pmode, r0));
5010 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5019 offset += GET_MODE_SIZE (mode);
5023 if (mem_rtx != NULL_RTX)
5026 if (offset_in_r0 == -1)
5028 emit_move_insn (r0, GEN_INT (offset));
5029 offset_in_r0 = offset;
5031 else if (offset != offset_in_r0)
5036 GEN_INT (offset - offset_in_r0)));
5037 offset_in_r0 += offset - offset_in_r0;
5040 if (pre_dec != NULL_RTX)
5046 (Pmode, r0, stack_pointer_rtx));
5050 offset -= GET_MODE_SIZE (mode);
5051 offset_in_r0 -= GET_MODE_SIZE (mode);
5056 mem_rtx = gen_rtx_MEM (mode, r0);
5058 mem_rtx = gen_rtx_MEM (mode,
5059 gen_rtx_PLUS (Pmode,
5063 /* We must not use an r0-based address for target-branch
5064 registers or for special registers without pre-dec
5065 memory addresses, since we store their values in r0
5067 if (TARGET_REGISTER_P (i)
5068 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5069 && mem_rtx != pre_dec))
5073 if (TARGET_REGISTER_P (i)
5074 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5075 && mem_rtx != pre_dec))
5077 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
5079 emit_move_insn (r0mode, reg_rtx);
5087 emit_move_insn (mem_rtx, reg_rtx);
5090 if (offset != d_rounding)
5094 push_regs (&live_regs_mask, current_function_interrupt);
5096 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5098 rtx insn = get_last_insn ();
5099 rtx last = emit_insn (gen_GOTaddr2picreg ());
5101 /* Mark these insns as possibly dead. Sometimes, flow2 may
5102 delete all uses of the PIC register. In this case, let it
5103 delete the initialization too. */
5106 insn = NEXT_INSN (insn);
5108 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5112 while (insn != last);
5115 if (SHMEDIA_REGS_STACK_ADJUST ())
5117 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5118 gen_rtx_SYMBOL_REF (Pmode,
5120 ? "__GCC_push_shmedia_regs"
5121 : "__GCC_push_shmedia_regs_nofpu"));
5122 /* This must NOT go through the PLT, otherwise mach and macl
5123 may be clobbered. */
5124 emit_insn (gen_shmedia_save_restore_regs_compact
5125 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5128 if (target_flags != save_flags && ! current_function_interrupt)
5130 rtx insn = emit_insn (gen_toggle_sz ());
5132 /* If we're lucky, a mode switch in the function body will
5133 overwrite fpscr, turning this insn dead. Tell flow this
5134 insn is ok to delete. */
5135 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5140 target_flags = save_flags;
5142 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5143 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
5145 if (frame_pointer_needed)
5146 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5148 if (TARGET_SHCOMPACT
5149 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5151 /* This must NOT go through the PLT, otherwise mach and macl
5152 may be clobbered. */
5153 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5154 gen_rtx_SYMBOL_REF (Pmode,
5155 "__GCC_shcompact_incoming_args"));
5156 emit_insn (gen_shcompact_incoming_args ());
5161 sh_expand_epilogue ()
5163 HARD_REG_SET live_regs_mask;
5167 int save_flags = target_flags;
5169 int fpscr_deferred = 0;
5171 d = calc_live_regs (&live_regs_mask);
5173 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
5174 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5175 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5177 frame_size = rounded_frame_size (d) - d_rounding;
5179 if (frame_pointer_needed)
5181 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
5183 /* We must avoid moving the stack pointer adjustment past code
5184 which reads from the local frame, else an interrupt could
5185 occur after the SP adjustment and clobber data in the local
5187 emit_insn (gen_blockage ());
5188 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5190 else if (frame_size)
5192 /* We must avoid moving the stack pointer adjustment past code
5193 which reads from the local frame, else an interrupt could
5194 occur after the SP adjustment and clobber data in the local
5196 emit_insn (gen_blockage ());
5197 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
5200 if (SHMEDIA_REGS_STACK_ADJUST ())
5202 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5203 gen_rtx_SYMBOL_REF (Pmode,
5205 ? "__GCC_pop_shmedia_regs"
5206 : "__GCC_pop_shmedia_regs_nofpu"));
5207 /* This must NOT go through the PLT, otherwise mach and macl
5208 may be clobbered. */
5209 emit_insn (gen_shmedia_save_restore_regs_compact
5210 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5213 /* Pop all the registers. */
5215 if (target_flags != save_flags && ! current_function_interrupt)
5216 emit_insn (gen_toggle_sz ());
5219 int offset = d_rounding;
5220 int offset_in_r0 = -1;
5223 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5224 int tmp_regno = R20_REG;
5226 /* We loop twice: first, we save 8-byte aligned registers in the
5227 higher addresses, that are known to be aligned. Then, we
5228 proceed to saving 32-bit registers that don't need 8-byte
5230 for (align = 0; align <= 1; align++)
5231 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5232 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5234 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5236 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5238 if (mode == SFmode && (i % 2) == 0
5239 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5240 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5246 /* If we're doing the aligned pass and this is not aligned,
5247 or we're doing the unaligned pass and this is aligned,
5249 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5253 reg_rtx = gen_rtx_REG (mode, reg);
5255 mem_rtx = gen_rtx_MEM (mode,
5256 gen_rtx_PLUS (Pmode,
5260 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5266 if (HAVE_POST_INCREMENT
5267 && (offset == offset_in_r0
5268 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5269 && mem_rtx == NULL_RTX)
5270 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5272 post_inc = gen_rtx_MEM (mode,
5273 gen_rtx_POST_INC (Pmode, r0));
5275 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5278 post_inc = NULL_RTX;
5287 if (mem_rtx != NULL_RTX)
5290 if (offset_in_r0 == -1)
5292 emit_move_insn (r0, GEN_INT (offset));
5293 offset_in_r0 = offset;
5295 else if (offset != offset_in_r0)
5300 GEN_INT (offset - offset_in_r0)));
5301 offset_in_r0 += offset - offset_in_r0;
5304 if (post_inc != NULL_RTX)
5310 (Pmode, r0, stack_pointer_rtx));
5316 offset_in_r0 += GET_MODE_SIZE (mode);
5319 mem_rtx = gen_rtx_MEM (mode, r0);
5321 mem_rtx = gen_rtx_MEM (mode,
5322 gen_rtx_PLUS (Pmode,
5326 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5327 && mem_rtx != post_inc)
5331 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5332 && mem_rtx != post_inc)
5334 insn = emit_move_insn (r0, mem_rtx);
5337 else if (TARGET_REGISTER_P (i))
5339 rtx tmp_reg = gen_rtx_REG (mode, tmp_regno);
5341 /* Give the scheduler a bit of freedom by using R20..R23
5342 in a round-robin fashion. Don't use R1 here because
5343 we want to use it for EH_RETURN_STACKADJ_RTX. */
5344 insn = emit_move_insn (tmp_reg, mem_rtx);
5346 if (++tmp_regno > R23_REG)
5347 tmp_regno = R20_REG;
5350 insn = emit_move_insn (reg_rtx, mem_rtx);
5352 offset += GET_MODE_SIZE (mode);
5355 if (offset != d + d_rounding)
5362 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5364 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5366 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5368 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5369 && hard_regs_intersect_p (&live_regs_mask,
5370 ®_class_contents[DF_REGS]))
5372 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5374 if (j == FIRST_FP_REG && fpscr_deferred)
5378 if (target_flags != save_flags && ! current_function_interrupt)
5379 emit_insn (gen_toggle_sz ());
5380 target_flags = save_flags;
5382 output_stack_adjust (extra_push + current_function_pretend_args_size
5384 + current_function_args_info.stack_regs * 8,
5385 stack_pointer_rtx, 7, emit_insn);
5387 if (current_function_calls_eh_return)
5388 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5389 EH_RETURN_STACKADJ_RTX));
5391 /* Switch back to the normal stack if necessary. */
5393 emit_insn (gen_sp_switch_2 ());
5395 /* Tell flow the insn that pops PR isn't dead. */
5396 /* PR_REG will never be live in SHmedia mode, and we don't need to
5397 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5398 by the return pattern. */
5399 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5400 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5403 static int sh_need_epilogue_known = 0;
5408 if (! sh_need_epilogue_known)
5413 sh_expand_epilogue ();
5414 epilogue = get_insns ();
5416 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5418 return sh_need_epilogue_known > 0;
5421 /* Emit code to change the current function's return address to RA.
5422 TEMP is available as a scratch register, if needed. */
5425 sh_set_return_address (ra, tmp)
5428 HARD_REG_SET live_regs_mask;
5431 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5434 d = calc_live_regs (&live_regs_mask);
5436 /* If pr_reg isn't life, we can set it (or the register given in
5437 sh_media_register_for_return) directly. */
5438 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5444 int rr_regno = sh_media_register_for_return ();
5449 rr = gen_rtx_REG (DImode, rr_regno);
5452 rr = gen_rtx_REG (SImode, pr_reg);
5454 emit_insn (GEN_MOV (rr, ra));
5455 /* Tell flow the register for return isn't dead. */
5456 emit_insn (gen_rtx_USE (VOIDmode, rr));
5466 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5467 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5468 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5472 /* We loop twice: first, we save 8-byte aligned registers in the
5473 higher addresses, that are known to be aligned. Then, we
5474 proceed to saving 32-bit registers that don't need 8-byte
5476 for (align = 0; align <= 1; align++)
5477 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5478 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5480 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5482 if (mode == SFmode && (i % 2) == 0
5483 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5484 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5490 /* If we're doing the aligned pass and this is not aligned,
5491 or we're doing the unaligned pass and this is aligned,
5493 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5500 offset += GET_MODE_SIZE (mode);
5503 /* We can't find pr register. */
5507 pr_offset = (rounded_frame_size (d) - d_rounding + offset
5508 + SHMEDIA_REGS_STACK_ADJUST ());
5511 pr_offset = rounded_frame_size (d) - d_rounding;
5513 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5514 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5516 tmp = gen_rtx_MEM (Pmode, tmp);
5517 emit_insn (GEN_MOV (tmp, ra));
5520 /* Clear variables at function end. */
5523 sh_output_function_epilogue (file, size)
5524 FILE *file ATTRIBUTE_UNUSED;
5525 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5527 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5528 sh_need_epilogue_known = 0;
5529 sp_switch = NULL_RTX;
5533 sh_builtin_saveregs ()
5535 /* First unnamed integer register. */
5536 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5537 /* Number of integer registers we need to save. */
5538 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5539 /* First unnamed SFmode float reg */
5540 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5541 /* Number of SFmode float regs to save. */
5542 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5545 HOST_WIDE_INT alias_set;
5551 int pushregs = n_intregs;
5553 while (pushregs < NPARM_REGS (SImode) - 1
5554 && (CALL_COOKIE_INT_REG_GET
5555 (current_function_args_info.call_cookie,
5556 NPARM_REGS (SImode) - pushregs)
5559 current_function_args_info.call_cookie
5560 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5565 if (pushregs == NPARM_REGS (SImode))
5566 current_function_args_info.call_cookie
5567 |= (CALL_COOKIE_INT_REG (0, 1)
5568 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5570 current_function_args_info.call_cookie
5571 |= CALL_COOKIE_STACKSEQ (pushregs);
5573 current_function_pretend_args_size += 8 * n_intregs;
5575 if (TARGET_SHCOMPACT)
5579 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5581 error ("__builtin_saveregs not supported by this subtarget");
5588 /* Allocate block of memory for the regs. */
5589 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5590 Or can assign_stack_local accept a 0 SIZE argument? */
5591 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5594 regbuf = gen_rtx_MEM (BLKmode,
5595 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5596 else if (n_floatregs & 1)
5600 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5601 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5602 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5603 regbuf = change_address (regbuf, BLKmode, addr);
5606 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5607 alias_set = get_varargs_alias_set ();
5608 set_mem_alias_set (regbuf, alias_set);
5611 This is optimized to only save the regs that are necessary. Explicitly
5612 named args need not be saved. */
5614 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5615 adjust_address (regbuf, BLKmode,
5616 n_floatregs * UNITS_PER_WORD),
5617 n_intregs, n_intregs * UNITS_PER_WORD);
5620 /* Return the address of the regbuf. */
5621 return XEXP (regbuf, 0);
5624 This is optimized to only save the regs that are necessary. Explicitly
5625 named args need not be saved.
5626 We explicitly build a pointer to the buffer because it halves the insn
5627 count when not optimizing (otherwise the pointer is built for each reg
5629 We emit the moves in reverse order so that we can use predecrement. */
5631 fpregs = gen_reg_rtx (Pmode);
5632 emit_move_insn (fpregs, XEXP (regbuf, 0));
5633 emit_insn (gen_addsi3 (fpregs, fpregs,
5634 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5638 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5640 emit_insn (gen_addsi3 (fpregs, fpregs,
5641 GEN_INT (-2 * UNITS_PER_WORD)));
5642 mem = gen_rtx_MEM (DFmode, fpregs);
5643 set_mem_alias_set (mem, alias_set);
5644 emit_move_insn (mem,
5645 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5647 regno = first_floatreg;
5650 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5651 mem = gen_rtx_MEM (SFmode, fpregs);
5652 set_mem_alias_set (mem, alias_set);
5653 emit_move_insn (mem,
5654 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5655 - (TARGET_LITTLE_ENDIAN != 0)));
5659 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5663 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5664 mem = gen_rtx_MEM (SFmode, fpregs);
5665 set_mem_alias_set (mem, alias_set);
5666 emit_move_insn (mem,
5667 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5670 /* Return the address of the regbuf. */
5671 return XEXP (regbuf, 0);
5674 /* Define the `__builtin_va_list' type for the ABI. */
5679 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5682 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5683 return ptr_type_node;
5685 record = make_node (RECORD_TYPE);
5687 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5689 f_next_o_limit = build_decl (FIELD_DECL,
5690 get_identifier ("__va_next_o_limit"),
5692 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5694 f_next_fp_limit = build_decl (FIELD_DECL,
5695 get_identifier ("__va_next_fp_limit"),
5697 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5700 DECL_FIELD_CONTEXT (f_next_o) = record;
5701 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5702 DECL_FIELD_CONTEXT (f_next_fp) = record;
5703 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5704 DECL_FIELD_CONTEXT (f_next_stack) = record;
5706 TYPE_FIELDS (record) = f_next_o;
5707 TREE_CHAIN (f_next_o) = f_next_o_limit;
5708 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5709 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5710 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5712 layout_type (record);
5717 /* Implement `va_start' for varargs and stdarg. */
5720 sh_va_start (valist, nextarg)
5724 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5725 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5731 expand_builtin_saveregs ();
5732 std_expand_builtin_va_start (valist, nextarg);
5736 if ((! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5738 std_expand_builtin_va_start (valist, nextarg);
5742 f_next_o = TYPE_FIELDS (va_list_type_node);
5743 f_next_o_limit = TREE_CHAIN (f_next_o);
5744 f_next_fp = TREE_CHAIN (f_next_o_limit);
5745 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5746 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5748 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5749 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5750 valist, f_next_o_limit);
5751 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5752 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5753 valist, f_next_fp_limit);
5754 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5755 valist, f_next_stack);
5757 /* Call __builtin_saveregs. */
5758 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5759 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5760 TREE_SIDE_EFFECTS (t) = 1;
5761 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5763 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5768 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5769 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5770 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5771 TREE_SIDE_EFFECTS (t) = 1;
5772 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5774 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5775 TREE_SIDE_EFFECTS (t) = 1;
5776 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5778 nint = current_function_args_info.arg_count[SH_ARG_INT];
5783 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5784 build_int_2 (UNITS_PER_WORD * nint, 0)));
5785 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5786 TREE_SIDE_EFFECTS (t) = 1;
5787 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5789 u = make_tree (ptr_type_node, nextarg);
5790 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5791 TREE_SIDE_EFFECTS (t) = 1;
5792 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5795 /* Implement `va_arg'. */
5798 sh_va_arg (valist, type)
5801 HOST_WIDE_INT size, rsize;
5802 tree tmp, pptr_type_node;
5805 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5807 size = int_size_in_bytes (type);
5808 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5809 pptr_type_node = build_pointer_type (ptr_type_node);
5812 type = build_pointer_type (type);
5814 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) && ! TARGET_HITACHI)
5816 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5817 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5819 rtx lab_false, lab_over;
5821 f_next_o = TYPE_FIELDS (va_list_type_node);
5822 f_next_o_limit = TREE_CHAIN (f_next_o);
5823 f_next_fp = TREE_CHAIN (f_next_o_limit);
5824 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5825 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5827 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5828 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5829 valist, f_next_o_limit);
5830 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5832 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5833 valist, f_next_fp_limit);
5834 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5835 valist, f_next_stack);
5839 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5840 || (TREE_CODE (type) == COMPLEX_TYPE
5841 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5846 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5849 addr_rtx = gen_reg_rtx (Pmode);
5850 lab_false = gen_label_rtx ();
5851 lab_over = gen_label_rtx ();
5856 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5857 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5859 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5861 expand_expr (next_fp_limit, NULL_RTX,
5862 Pmode, EXPAND_NORMAL),
5863 GE, const1_rtx, Pmode, 1, lab_false);
5865 if (TYPE_ALIGN (type) > BITS_PER_WORD
5866 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5867 && (n_floatregs & 1)))
5869 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5870 build_int_2 (UNITS_PER_WORD, 0));
5871 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5872 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5873 TREE_SIDE_EFFECTS (tmp) = 1;
5874 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5877 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5878 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5880 emit_move_insn (addr_rtx, r);
5882 emit_jump_insn (gen_jump (lab_over));
5884 emit_label (lab_false);
5886 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5887 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5889 emit_move_insn (addr_rtx, r);
5893 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5894 build_int_2 (rsize, 0));
5896 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5898 expand_expr (next_o_limit, NULL_RTX,
5899 Pmode, EXPAND_NORMAL),
5900 GT, const1_rtx, Pmode, 1, lab_false);
5902 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5903 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5905 emit_move_insn (addr_rtx, r);
5907 emit_jump_insn (gen_jump (lab_over));
5909 emit_label (lab_false);
5911 if (size > 4 && ! TARGET_SH4)
5913 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5914 TREE_SIDE_EFFECTS (tmp) = 1;
5915 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5918 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5919 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5921 emit_move_insn (addr_rtx, r);
5924 emit_label (lab_over);
5926 tmp = make_tree (pptr_type_node, addr_rtx);
5927 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5930 /* ??? In va-sh.h, there had been code to make values larger than
5931 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5933 result = std_expand_builtin_va_arg (valist, type);
5936 #ifdef POINTERS_EXTEND_UNSIGNED
5937 if (GET_MODE (addr) != Pmode)
5938 addr = convert_memory_address (Pmode, result);
5940 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5941 set_mem_alias_set (result, get_varargs_alias_set ());
5943 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5944 argument to the varargs alias set. */
5948 /* Define the offset between two registers, one to be eliminated, and
5949 the other its replacement, at the start of a routine. */
5952 initial_elimination_offset (from, to)
5957 int regs_saved_rounding = 0;
5958 int total_saved_regs_space;
5959 int total_auto_space;
5960 int save_flags = target_flags;
5963 HARD_REG_SET live_regs_mask;
5964 regs_saved = calc_live_regs (&live_regs_mask);
5965 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5966 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5967 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5968 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5970 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5971 copy_flags = target_flags;
5972 target_flags = save_flags;
5974 total_saved_regs_space = regs_saved + regs_saved_rounding;
5976 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5977 return total_saved_regs_space + total_auto_space
5978 + current_function_args_info.byref_regs * 8;
5980 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5981 return total_saved_regs_space + total_auto_space
5982 + current_function_args_info.byref_regs * 8;
5984 /* Initial gap between fp and sp is 0. */
5985 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5988 if (from == RETURN_ADDRESS_POINTER_REGNUM
5989 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5993 int i, n = total_saved_regs_space;
5995 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5997 n += total_auto_space;
5999 /* If it wasn't saved, there's not much we can do. */
6000 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6003 target_flags = copy_flags;
6005 /* We loop twice: first, check 8-byte aligned registers,
6006 that are stored in the higher addresses, that are known
6007 to be aligned. Then, check 32-bit registers that don't
6008 need 8-byte alignment. */
6009 for (align = 1; align >= 0; align--)
6010 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6011 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6013 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6015 if (mode == SFmode && (i % 2) == 1
6016 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6017 && TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1)))
6023 /* If we're doing the aligned pass and this is not aligned,
6024 or we're doing the unaligned pass and this is aligned,
6026 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
6030 n -= GET_MODE_SIZE (mode);
6034 target_flags = save_flags;
6042 return total_auto_space;
6048 /* Handle machine specific pragmas to be semi-compatible with Hitachi
6052 sh_pr_interrupt (pfile)
6053 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6055 pragma_interrupt = 1;
6060 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6062 pragma_interrupt = pragma_trapa = 1;
6066 sh_pr_nosave_low_regs (pfile)
6067 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6069 pragma_nosave_low_regs = 1;
6072 /* Generate 'handle_interrupt' attribute for decls */
6075 sh_insert_attributes (node, attributes)
6079 if (! pragma_interrupt
6080 || TREE_CODE (node) != FUNCTION_DECL)
6083 /* We are only interested in fields. */
6084 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6087 /* Add a 'handle_interrupt' attribute. */
6088 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6093 /* Supported attributes:
6095 interrupt_handler -- specifies this function is an interrupt handler.
6097 sp_switch -- specifies an alternate stack for an interrupt handler
6100 trap_exit -- use a trapa to exit an interrupt function instead of
6101 an rte instruction. */
6103 const struct attribute_spec sh_attribute_table[] =
6105 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6106 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6107 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6108 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6109 { NULL, 0, 0, false, false, false, NULL }
6112 /* Handle an "interrupt_handler" attribute; arguments as in
6113 struct attribute_spec.handler. */
6115 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
6118 tree args ATTRIBUTE_UNUSED;
6119 int flags ATTRIBUTE_UNUSED;
6122 if (TREE_CODE (*node) != FUNCTION_DECL)
6124 warning ("`%s' attribute only applies to functions",
6125 IDENTIFIER_POINTER (name));
6126 *no_add_attrs = true;
6128 else if (TARGET_SHCOMPACT)
6130 error ("attribute interrupt_handler is not compatible with -m5-compact");
6131 *no_add_attrs = true;
6137 /* Handle an "sp_switch" attribute; arguments as in
6138 struct attribute_spec.handler. */
6140 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
6144 int flags ATTRIBUTE_UNUSED;
6147 if (TREE_CODE (*node) != FUNCTION_DECL)
6149 warning ("`%s' attribute only applies to functions",
6150 IDENTIFIER_POINTER (name));
6151 *no_add_attrs = true;
6153 else if (!pragma_interrupt)
6155 /* The sp_switch attribute only has meaning for interrupt functions. */
6156 warning ("`%s' attribute only applies to interrupt functions",
6157 IDENTIFIER_POINTER (name));
6158 *no_add_attrs = true;
6160 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6162 /* The argument must be a constant string. */
6163 warning ("`%s' attribute argument not a string constant",
6164 IDENTIFIER_POINTER (name));
6165 *no_add_attrs = true;
6169 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6170 TREE_STRING_POINTER (TREE_VALUE (args)));
6176 /* Handle an "trap_exit" attribute; arguments as in
6177 struct attribute_spec.handler. */
6179 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
6183 int flags ATTRIBUTE_UNUSED;
6186 if (TREE_CODE (*node) != FUNCTION_DECL)
6188 warning ("`%s' attribute only applies to functions",
6189 IDENTIFIER_POINTER (name));
6190 *no_add_attrs = true;
6192 else if (!pragma_interrupt)
6194 /* The trap_exit attribute only has meaning for interrupt functions. */
6195 warning ("`%s' attribute only applies to interrupt functions",
6196 IDENTIFIER_POINTER (name));
6197 *no_add_attrs = true;
6199 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6201 /* The argument must be a constant integer. */
6202 warning ("`%s' attribute argument not an integer constant",
6203 IDENTIFIER_POINTER (name));
6204 *no_add_attrs = true;
6208 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6215 sh_cfun_interrupt_handler_p ()
6217 return (lookup_attribute ("interrupt_handler",
6218 DECL_ATTRIBUTES (current_function_decl))
6222 /* Predicates used by the templates. */
6224 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
6225 Used only in general_movsrc_operand. */
6228 system_reg_operand (op, mode)
6230 enum machine_mode mode ATTRIBUTE_UNUSED;
6242 /* Returns 1 if OP can be source of a simple move operation.
6243 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
6244 invalid as are subregs of system registers. */
6247 general_movsrc_operand (op, mode)
6249 enum machine_mode mode;
6251 if (GET_CODE (op) == MEM)
6253 rtx inside = XEXP (op, 0);
6254 if (GET_CODE (inside) == CONST)
6255 inside = XEXP (inside, 0);
6257 if (GET_CODE (inside) == LABEL_REF)
6260 if (GET_CODE (inside) == PLUS
6261 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
6262 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
6265 /* Only post inc allowed. */
6266 if (GET_CODE (inside) == PRE_DEC)
6270 if ((mode == QImode || mode == HImode)
6271 && (GET_CODE (op) == SUBREG
6272 && GET_CODE (XEXP (op, 0)) == REG
6273 && system_reg_operand (XEXP (op, 0), mode)))
6276 return general_operand (op, mode);
6279 /* Returns 1 if OP can be a destination of a move.
6280 Same as general_operand, but no preinc allowed. */
6283 general_movdst_operand (op, mode)
6285 enum machine_mode mode;
6287 /* Only pre dec allowed. */
6288 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
6291 return general_operand (op, mode);
6294 /* Returns 1 if OP is a normal arithmetic register. */
6297 arith_reg_operand (op, mode)
6299 enum machine_mode mode;
6301 if (register_operand (op, mode))
6305 if (GET_CODE (op) == REG)
6307 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6308 regno = REGNO (SUBREG_REG (op));
6312 return (regno != T_REG && regno != PR_REG
6313 && ! TARGET_REGISTER_P (regno)
6314 && (regno != FPUL_REG || TARGET_SH4)
6315 && regno != MACH_REG && regno != MACL_REG);
6320 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
6321 because this would lead to missing sign extensions when truncating from
6322 DImode to SImode. */
6324 arith_reg_dest (op, mode)
6326 enum machine_mode mode;
6328 if (mode == DImode && GET_CODE (op) == SUBREG
6329 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
6331 return arith_reg_operand (op, mode);
6335 int_gpr_dest (op, mode)
6337 enum machine_mode mode ATTRIBUTE_UNUSED;
6339 enum machine_mode op_mode = GET_MODE (op);
6341 if (GET_MODE_CLASS (op_mode) != MODE_INT
6342 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
6344 if (! reload_completed)
6346 return true_regnum (op) <= LAST_GENERAL_REG;
6350 fp_arith_reg_operand (op, mode)
6352 enum machine_mode mode;
6354 if (register_operand (op, mode))
6358 if (GET_CODE (op) == REG)
6360 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6361 regno = REGNO (SUBREG_REG (op));
6365 return (regno >= FIRST_PSEUDO_REGISTER
6366 || FP_REGISTER_P (regno));
6371 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
6374 arith_operand (op, mode)
6376 enum machine_mode mode;
6378 if (arith_reg_operand (op, mode))
6383 /* FIXME: We should be checking whether the CONST_INT fits in a
6384 CONST_OK_FOR_J here, but this causes reload_cse to crash when
6385 attempting to transform a sequence of two 64-bit sets of the
6386 same register from literal constants into a set and an add,
6387 when the difference is too wide for an add. */
6388 if (GET_CODE (op) == CONST_INT
6389 || EXTRA_CONSTRAINT_S (op))
6394 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
6400 /* Returns 1 if OP is a valid source operand for a compare insn. */
6403 arith_reg_or_0_operand (op, mode)
6405 enum machine_mode mode;
6407 if (arith_reg_operand (op, mode))
6410 if (EXTRA_CONSTRAINT_U (op))
6416 /* Return 1 if OP is a valid source operand for an SHmedia operation
6417 that takes either a register or a 6-bit immediate. */
6420 shmedia_6bit_operand (op, mode)
6422 enum machine_mode mode;
6424 return (arith_reg_operand (op, mode)
6425 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
6428 /* Returns 1 if OP is a valid source operand for a logical operation. */
6431 logical_operand (op, mode)
6433 enum machine_mode mode;
6435 if (arith_reg_operand (op, mode))
6440 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
6445 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
6452 and_operand (op, mode)
6454 enum machine_mode mode;
6456 if (logical_operand (op, mode))
6459 /* Check mshflo.l / mshflhi.l opportunities. */
6462 && GET_CODE (op) == CONST_INT
6463 && (INTVAL (op) == (unsigned) 0xffffffff
6464 || INTVAL (op) == (HOST_WIDE_INT) -1 << 32))
6470 /* Nonzero if OP is a floating point value with value 0.0. */
6473 fp_zero_operand (op)
6478 if (GET_MODE (op) != SFmode)
6481 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6482 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6485 /* Nonzero if OP is a floating point value with value 1.0. */
6493 if (GET_MODE (op) != SFmode)
6496 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6497 return REAL_VALUES_EQUAL (r, dconst1);
6500 /* For -m4 and -m4-single-only, mode switching is used. If we are
6501 compiling without -mfmovd, movsf_ie isn't taken into account for
6502 mode switching. We could check in machine_dependent_reorg for
6503 cases where we know we are in single precision mode, but there is
6504 interface to find that out during reload, so we must avoid
6505 choosing an fldi alternative during reload and thus failing to
6506 allocate a scratch register for the constant loading. */
6510 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6514 tertiary_reload_operand (op, mode)
6516 enum machine_mode mode ATTRIBUTE_UNUSED;
6518 enum rtx_code code = GET_CODE (op);
6519 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6523 fpscr_operand (op, mode)
6525 enum machine_mode mode ATTRIBUTE_UNUSED;
6527 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6528 && GET_MODE (op) == PSImode);
6532 fpul_operand (op, mode)
6534 enum machine_mode mode;
6537 return fp_arith_reg_operand (op, mode);
6539 return (GET_CODE (op) == REG
6540 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6541 && GET_MODE (op) == mode);
6545 symbol_ref_operand (op, mode)
6547 enum machine_mode mode ATTRIBUTE_UNUSED;
6549 return (GET_CODE (op) == SYMBOL_REF);
6552 /* Return the TLS type for TLS symbols, 0 for otherwise. */
6554 tls_symbolic_operand (op, mode)
6556 enum machine_mode mode ATTRIBUTE_UNUSED;
6560 if (GET_CODE (op) != SYMBOL_REF)
6564 STRIP_DATALABEL_ENCODING(str, str);
6565 if (! TLS_SYMNAME_P (str))
6571 return TLS_MODEL_GLOBAL_DYNAMIC;
6573 return TLS_MODEL_LOCAL_DYNAMIC;
6575 return TLS_MODEL_INITIAL_EXEC;
6577 return TLS_MODEL_LOCAL_EXEC;
6583 commutative_float_operator (op, mode)
6585 enum machine_mode mode;
6587 if (GET_MODE (op) != mode)
6589 switch (GET_CODE (op))
6601 noncommutative_float_operator (op, mode)
6603 enum machine_mode mode;
6605 if (GET_MODE (op) != mode)
6607 switch (GET_CODE (op))
6619 unary_float_operator (op, mode)
6621 enum machine_mode mode;
6623 if (GET_MODE (op) != mode)
6625 switch (GET_CODE (op))
6638 binary_float_operator (op, mode)
6640 enum machine_mode mode;
6642 if (GET_MODE (op) != mode)
6644 switch (GET_CODE (op))
6658 binary_logical_operator (op, mode)
6660 enum machine_mode mode;
6662 if (GET_MODE (op) != mode)
6664 switch (GET_CODE (op))
6677 equality_comparison_operator (op, mode)
6679 enum machine_mode mode;
6681 return ((mode == VOIDmode || GET_MODE (op) == mode)
6682 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6685 int greater_comparison_operator (op, mode)
6687 enum machine_mode mode;
6689 if (mode != VOIDmode && GET_MODE (op) == mode)
6691 switch (GET_CODE (op))
6703 int less_comparison_operator (op, mode)
6705 enum machine_mode mode;
6707 if (mode != VOIDmode && GET_MODE (op) == mode)
6709 switch (GET_CODE (op))
6721 /* Accept pseudos and branch target registers. */
6723 target_reg_operand (op, mode)
6725 enum machine_mode mode;
6728 || GET_MODE (op) != DImode)
6731 if (GET_CODE (op) == SUBREG)
6734 if (GET_CODE (op) != REG)
6737 /* We must protect ourselves from matching pseudos that are virtual
6738 register, because they will eventually be replaced with hardware
6739 registers that aren't branch-target registers. */
6740 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6741 || TARGET_REGISTER_P (REGNO (op)))
6747 /* Same as target_reg_operand, except that label_refs and symbol_refs
6748 are accepted before reload. */
6750 target_operand (op, mode)
6752 enum machine_mode mode;
6757 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6758 && EXTRA_CONSTRAINT_T (op))
6759 return ! reload_completed;
6761 return target_reg_operand (op, mode);
6765 mextr_bit_offset (op, mode)
6767 enum machine_mode mode ATTRIBUTE_UNUSED;
6771 if (GET_CODE (op) != CONST_INT)
6774 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6778 extend_reg_operand (op, mode)
6780 enum machine_mode mode;
6782 return (GET_CODE (op) == TRUNCATE
6784 : arith_reg_operand) (op, mode);
6788 trunc_hi_operand (op, mode)
6790 enum machine_mode mode;
6792 enum machine_mode op_mode = GET_MODE (op);
6794 if (op_mode != SImode && op_mode != DImode
6795 && op_mode != V4HImode && op_mode != V2SImode)
6797 return extend_reg_operand (op, mode);
6801 extend_reg_or_0_operand (op, mode)
6803 enum machine_mode mode;
6805 return (GET_CODE (op) == TRUNCATE
6807 : arith_reg_or_0_operand) (op, mode);
6811 general_extend_operand (op, mode)
6813 enum machine_mode mode;
6815 return (GET_CODE (op) == TRUNCATE
6817 : nonimmediate_operand) (op, mode);
6821 inqhi_operand (op, mode)
6823 enum machine_mode mode;
6825 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6828 /* Can't use true_regnum here because copy_cost wants to know about
6829 SECONDARY_INPUT_RELOAD_CLASS. */
6830 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6834 sh_rep_vec (v, mode)
6836 enum machine_mode mode;
6841 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6842 || (GET_MODE (v) != mode && mode != VOIDmode))
6844 i = XVECLEN (v, 0) - 2;
6845 x = XVECEXP (v, 0, i + 1);
6846 if (GET_MODE_UNIT_SIZE (mode) == 1)
6848 y = XVECEXP (v, 0, i);
6849 for (i -= 2 ; i >= 0; i -= 2)
6850 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6851 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6856 if (XVECEXP (v, 0, i) != x)
6861 /* Determine if V is a constant vector matching MODE with only one element
6862 that is not a sign extension. Two byte-sized elements count as one. */
6864 sh_1el_vec (v, mode)
6866 enum machine_mode mode;
6869 int i, last, least, sign_ix;
6872 if (GET_CODE (v) != CONST_VECTOR
6873 || (GET_MODE (v) != mode && mode != VOIDmode))
6875 /* Determine numbers of last and of least significant elements. */
6876 last = XVECLEN (v, 0) - 1;
6877 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6878 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6881 if (GET_MODE_UNIT_SIZE (mode) == 1)
6882 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6883 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6885 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6886 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6887 ? constm1_rtx : const0_rtx);
6888 i = XVECLEN (v, 0) - 1;
6890 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6897 sh_const_vec (v, mode)
6899 enum machine_mode mode;
6903 if (GET_CODE (v) != CONST_VECTOR
6904 || (GET_MODE (v) != mode && mode != VOIDmode))
6906 i = XVECLEN (v, 0) - 1;
6908 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6913 /* Return the destination address of a branch. */
6916 branch_dest (branch)
6919 rtx dest = SET_SRC (PATTERN (branch));
6922 if (GET_CODE (dest) == IF_THEN_ELSE)
6923 dest = XEXP (dest, 1);
6924 dest = XEXP (dest, 0);
6925 dest_uid = INSN_UID (dest);
6926 return INSN_ADDRESSES (dest_uid);
6929 /* Return nonzero if REG is not used after INSN.
6930 We assume REG is a reload reg, and therefore does
6931 not live past labels. It may live past calls or jumps though. */
6933 reg_unused_after (reg, insn)
6940 /* If the reg is set by this instruction, then it is safe for our
6941 case. Disregard the case where this is a store to memory, since
6942 we are checking a register used in the store address. */
6943 set = single_set (insn);
6944 if (set && GET_CODE (SET_DEST (set)) != MEM
6945 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6948 while ((insn = NEXT_INSN (insn)))
6950 code = GET_CODE (insn);
6953 /* If this is a label that existed before reload, then the register
6954 if dead here. However, if this is a label added by reorg, then
6955 the register may still be live here. We can't tell the difference,
6956 so we just ignore labels completely. */
6957 if (code == CODE_LABEL)
6962 if (code == JUMP_INSN)
6965 /* If this is a sequence, we must handle them all at once.
6966 We could have for instance a call that sets the target register,
6967 and an insn in a delay slot that uses the register. In this case,
6968 we must return 0. */
6969 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6974 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6976 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6977 rtx set = single_set (this_insn);
6979 if (GET_CODE (this_insn) == CALL_INSN)
6981 else if (GET_CODE (this_insn) == JUMP_INSN)
6983 if (INSN_ANNULLED_BRANCH_P (this_insn))
6988 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6990 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6992 if (GET_CODE (SET_DEST (set)) != MEM)
6998 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7003 else if (code == JUMP_INSN)
7006 else if (GET_RTX_CLASS (code) == 'i')
7008 rtx set = single_set (insn);
7010 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7012 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7013 return GET_CODE (SET_DEST (set)) != MEM;
7014 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7018 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7026 static GTY(()) rtx fpscr_rtx;
7032 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
7033 REG_USERVAR_P (fpscr_rtx) = 1;
7034 mark_user_reg (fpscr_rtx);
7036 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7037 mark_user_reg (fpscr_rtx);
7056 expand_sf_unop (fun, operands)
7057 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7060 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7064 expand_sf_binop (fun, operands)
7065 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7068 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7073 expand_df_unop (fun, operands)
7074 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7077 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7081 expand_df_binop (fun, operands)
7082 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7085 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7089 /* ??? gcc does flow analysis strictly after common subexpression
7090 elimination. As a result, common subexpression elimination fails
7091 when there are some intervening statements setting the same register.
7092 If we did nothing about this, this would hurt the precision switching
7093 for SH4 badly. There is some cse after reload, but it is unable to
7094 undo the extra register pressure from the unused instructions, and
7095 it cannot remove auto-increment loads.
7097 A C code example that shows this flow/cse weakness for (at least) SH
7098 and sparc (as of gcc ss-970706) is this:
7112 So we add another pass before common subexpression elimination, to
7113 remove assignments that are dead due to a following assignment in the
7114 same basic block. */
7117 mark_use (x, reg_set_block)
7118 rtx x, *reg_set_block;
7124 code = GET_CODE (x);
7129 int regno = REGNO (x);
7130 int nregs = (regno < FIRST_PSEUDO_REGISTER
7131 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7135 reg_set_block[regno + nregs - 1] = 0;
7142 rtx dest = SET_DEST (x);
7144 if (GET_CODE (dest) == SUBREG)
7145 dest = SUBREG_REG (dest);
7146 if (GET_CODE (dest) != REG)
7147 mark_use (dest, reg_set_block);
7148 mark_use (SET_SRC (x), reg_set_block);
7155 const char *fmt = GET_RTX_FORMAT (code);
7157 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7160 mark_use (XEXP (x, i), reg_set_block);
7161 else if (fmt[i] == 'E')
7162 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7163 mark_use (XVECEXP (x, i, j), reg_set_block);
7170 static rtx get_free_reg PARAMS ((HARD_REG_SET));
7172 /* This function returns a register to use to load the address to load
7173 the fpscr from. Currently it always returns r1 or r7, but when we are
7174 able to use pseudo registers after combine, or have a better mechanism
7175 for choosing a register, it should be done here. */
7176 /* REGS_LIVE is the liveness information for the point for which we
7177 need this allocation. In some bare-bones exit blocks, r1 is live at the
7178 start. We can even have all of r0..r3 being live:
7179 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7180 INSN before which new insns are placed with will clobber the register
7181 we return. If a basic block consists only of setting the return value
7182 register to a pseudo and using that register, the return value is not
7183 live before or after this block, yet we we'll insert our insns right in
7187 get_free_reg (regs_live)
7188 HARD_REG_SET regs_live;
7190 if (! TEST_HARD_REG_BIT (regs_live, 1))
7191 return gen_rtx_REG (Pmode, 1);
7193 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7194 there shouldn't be anything but a jump before the function end. */
7195 if (! TEST_HARD_REG_BIT (regs_live, 7))
7196 return gen_rtx_REG (Pmode, 7);
7201 /* This function will set the fpscr from memory.
7202 MODE is the mode we are setting it to. */
7204 fpscr_set_from_mem (mode, regs_live)
7206 HARD_REG_SET regs_live;
7208 enum attr_fp_mode fp_mode = mode;
7209 rtx addr_reg = get_free_reg (regs_live);
7211 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7212 emit_insn (gen_fpu_switch1 (addr_reg));
7214 emit_insn (gen_fpu_switch0 (addr_reg));
7217 /* Is the given character a logical line separator for the assembler? */
7218 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7219 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7223 sh_insn_length_adjustment (insn)
7226 /* Instructions with unfilled delay slots take up an extra two bytes for
7227 the nop in the delay slot. */
7228 if (((GET_CODE (insn) == INSN
7229 && GET_CODE (PATTERN (insn)) != USE
7230 && GET_CODE (PATTERN (insn)) != CLOBBER)
7231 || GET_CODE (insn) == CALL_INSN
7232 || (GET_CODE (insn) == JUMP_INSN
7233 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7234 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7235 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7236 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7239 /* SH2e has a bug that prevents the use of annulled branches, so if
7240 the delay slot is not filled, we'll have to put a NOP in it. */
7241 if (sh_cpu == CPU_SH2E
7242 && GET_CODE (insn) == JUMP_INSN
7243 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7244 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7245 && get_attr_type (insn) == TYPE_CBRANCH
7246 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7249 /* sh-dsp parallel processing insn take four bytes instead of two. */
7251 if (GET_CODE (insn) == INSN)
7254 rtx body = PATTERN (insn);
7255 const char *template;
7257 int maybe_label = 1;
7259 if (GET_CODE (body) == ASM_INPUT)
7260 template = XSTR (body, 0);
7261 else if (asm_noperands (body) >= 0)
7263 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7272 while (c == ' ' || c == '\t');
7273 /* all sh-dsp parallel-processing insns start with p.
7274 The only non-ppi sh insn starting with p is pref.
7275 The only ppi starting with pr is prnd. */
7276 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7278 /* The repeat pseudo-insn expands two three insns, a total of
7279 six bytes in size. */
7280 else if ((c == 'r' || c == 'R')
7281 && ! strncasecmp ("epeat", template, 5))
7283 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7285 /* If this is a label, it is obviously not a ppi insn. */
7286 if (c == ':' && maybe_label)
7291 else if (c == '\'' || c == '"')
7296 maybe_label = c != ':';
7304 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7305 isn't protected by a PIC unspec. */
7307 nonpic_symbol_mentioned_p (x)
7310 register const char *fmt;
7313 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7314 || GET_CODE (x) == PC)
7317 /* We don't want to look into the possible MEM location of a
7318 CONST_DOUBLE, since we're not going to use it, in general. */
7319 if (GET_CODE (x) == CONST_DOUBLE)
7322 if (GET_CODE (x) == UNSPEC
7323 && (XINT (x, 1) == UNSPEC_PIC
7324 || XINT (x, 1) == UNSPEC_GOT
7325 || XINT (x, 1) == UNSPEC_GOTOFF
7326 || XINT (x, 1) == UNSPEC_GOTPLT
7327 || XINT (x, 1) == UNSPEC_GOTTPOFF
7328 || XINT (x, 1) == UNSPEC_DTPOFF
7329 || XINT (x, 1) == UNSPEC_PLT))
7332 fmt = GET_RTX_FORMAT (GET_CODE (x));
7333 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7339 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7340 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
7343 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
7350 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
7351 @GOTOFF in `reg'. */
7353 legitimize_pic_address (orig, mode, reg)
7355 enum machine_mode mode ATTRIBUTE_UNUSED;
7358 if (tls_symbolic_operand (orig, Pmode))
7361 if (GET_CODE (orig) == LABEL_REF
7362 || (GET_CODE (orig) == SYMBOL_REF
7363 && (CONSTANT_POOL_ADDRESS_P (orig)
7364 /* SYMBOL_REF_FLAG is set on static symbols. */
7365 || SYMBOL_REF_FLAG (orig))))
7368 reg = gen_reg_rtx (Pmode);
7370 emit_insn (gen_symGOTOFF2reg (reg, orig));
7373 else if (GET_CODE (orig) == SYMBOL_REF)
7376 reg = gen_reg_rtx (Pmode);
7378 emit_insn (gen_symGOT2reg (reg, orig));
7384 /* Mark the use of a constant in the literal table. If the constant
7385 has multiple labels, make it unique. */
7387 mark_constant_pool_use (x)
7390 rtx insn, lab, pattern;
7395 switch (GET_CODE (x))
7405 /* Get the first label in the list of labels for the same constant
7406 and delete another labels in the list. */
7408 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7410 if (GET_CODE (insn) != CODE_LABEL
7411 || LABEL_REFS (insn) != NEXT_INSN (insn))
7416 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
7417 INSN_DELETED_P (insn) = 1;
7419 /* Mark constants in a window. */
7420 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
7422 if (GET_CODE (insn) != INSN)
7425 pattern = PATTERN (insn);
7426 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
7429 switch (XINT (pattern, 1))
7431 case UNSPECV_CONST2:
7432 case UNSPECV_CONST4:
7433 case UNSPECV_CONST8:
7434 XVECEXP (pattern, 0, 1) = const1_rtx;
7436 case UNSPECV_WINDOW_END:
7437 if (XVECEXP (pattern, 0, 0) == x)
7440 case UNSPECV_CONST_END:
7450 /* Return true if it's possible to redirect BRANCH1 to the destination
7451 of an unconditional jump BRANCH2. We only want to do this if the
7452 resulting branch will have a short displacement. */
7454 sh_can_redirect_branch (branch1, branch2)
7458 if (flag_expensive_optimizations && simplejump_p (branch2))
7460 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
7464 for (distance = 0, insn = NEXT_INSN (branch1);
7465 insn && distance < 256;
7466 insn = PREV_INSN (insn))
7471 distance += get_attr_length (insn);
7473 for (distance = 0, insn = NEXT_INSN (branch1);
7474 insn && distance < 256;
7475 insn = NEXT_INSN (insn))
7480 distance += get_attr_length (insn);
7486 /* Return nonzero if register old_reg can be renamed to register new_reg. */
7488 sh_hard_regno_rename_ok (old_reg, new_reg)
7489 unsigned int old_reg ATTRIBUTE_UNUSED;
7490 unsigned int new_reg;
7493 /* Interrupt functions can only use registers that have already been
7494 saved by the prologue, even if they would normally be
7497 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
7503 /* Function to update the integer COST
7504 based on the relationship between INSN that is dependent on
7505 DEP_INSN through the dependence LINK. The default is to make no
7506 adjustment to COST. This can be used for example to specify to
7507 the scheduler that an output- or anti-dependence does not incur
7508 the same cost as a data-dependence. The return value should be
7509 the new value for COST. */
7511 sh_adjust_cost (insn, link, dep_insn, cost)
7513 rtx link ATTRIBUTE_UNUSED;
7521 /* On SHmedia, if the dependence is an anti-dependence or
7522 output-dependence, there is no cost. */
7523 if (REG_NOTE_KIND (link) != 0)
7526 if (get_attr_is_mac_media (insn)
7527 && get_attr_is_mac_media (dep_insn))
7530 else if (REG_NOTE_KIND (link) == 0)
7532 enum attr_type dep_type, type;
7534 if (recog_memoized (insn) < 0
7535 || recog_memoized (dep_insn) < 0)
7538 dep_type = get_attr_type (dep_insn);
7539 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7541 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7542 && (type = get_attr_type (insn)) != TYPE_CALL
7543 && type != TYPE_SFUNC)
7546 /* The only input for a call that is timing-critical is the
7547 function's address. */
7548 if (GET_CODE(insn) == CALL_INSN)
7550 rtx call = PATTERN (insn);
7552 if (GET_CODE (call) == PARALLEL)
7553 call = XVECEXP (call, 0 ,0);
7554 if (GET_CODE (call) == SET)
7555 call = SET_SRC (call);
7556 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7557 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7560 /* Likewise, the most timing critical input for an sfuncs call
7561 is the function address. However, sfuncs typically start
7562 using their arguments pretty quickly.
7563 Assume a four cycle delay before they are needed. */
7564 /* All sfunc calls are parallels with at least four components.
7565 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7566 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7567 && XVECLEN (PATTERN (insn), 0) >= 4
7568 && (reg = sfunc_uses_reg (insn)))
7570 if (! reg_set_p (reg, dep_insn))
7573 /* When the preceding instruction loads the shift amount of
7574 the following SHAD/SHLD, the latency of the load is increased
7577 && get_attr_type (insn) == TYPE_DYN_SHIFT
7578 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7579 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7580 XEXP (SET_SRC (single_set(insn)),
7583 /* When an LS group instruction with a latency of less than
7584 3 cycles is followed by a double-precision floating-point
7585 instruction, FIPR, or FTRV, the latency of the first
7586 instruction is increased to 3 cycles. */
7588 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7589 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7591 /* The lsw register of a double-precision computation is ready one
7593 else if (reload_completed
7594 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7595 && (use_pat = single_set (insn))
7596 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7600 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7601 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7604 /* An anti-dependence penalty of two applies if the first insn is a double
7605 precision fadd / fsub / fmul. */
7606 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7607 && recog_memoized (dep_insn) >= 0
7608 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7609 /* A lot of alleged anti-flow dependences are fake,
7610 so check this one is real. */
7611 && flow_dependent_p (dep_insn, insn))
7618 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7619 if DEP_INSN is anti-flow dependent on INSN. */
7621 flow_dependent_p (insn, dep_insn)
7624 rtx tmp = PATTERN (insn);
7626 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7627 return tmp == NULL_RTX;
7630 /* A helper function for flow_dependent_p called through note_stores. */
7632 flow_dependent_p_1 (x, pat, data)
7634 rtx pat ATTRIBUTE_UNUSED;
7637 rtx * pinsn = (rtx *) data;
7639 if (*pinsn && reg_referenced_p (x, *pinsn))
7643 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7644 'special function' patterns (type sfunc) that clobber pr, but that
7645 do not look like function calls to leaf_function_p. Hence we must
7646 do this extra check. */
7650 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7653 /* This Function returns nonzero if the DFA based scheduler interface
7654 is to be used. At present this is supported for the SH4 only. */
7656 sh_use_dfa_interface()
7658 if (TARGET_HARD_SH4)
7664 /* This function returns "2" to indicate dual issue for the SH4
7665 processor. To be used by the DFA pipeline description. */
7669 if (TARGET_SUPERSCALAR)
7675 /* SHmedia requires registers for branches, so we can't generate new
7676 branches past reload. */
7678 sh_cannot_modify_jumps_p ()
7680 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7684 sh_ms_bitfield_layout_p (record_type)
7685 tree record_type ATTRIBUTE_UNUSED;
7690 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
7691 may access it using GOTOFF instead of GOT. */
7694 sh_encode_section_info (decl, first)
7701 rtl = DECL_RTL (decl);
7703 rtl = TREE_CST_RTL (decl);
7704 if (GET_CODE (rtl) != MEM)
7706 symbol = XEXP (rtl, 0);
7707 if (GET_CODE (symbol) != SYMBOL_REF)
7711 SYMBOL_REF_FLAG (symbol) = (*targetm.binds_local_p) (decl);
7713 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7715 const char *symbol_str, *orig_str;
7717 enum tls_model kind;
7722 orig_str = XSTR (symbol, 0);
7723 is_local = (*targetm.binds_local_p) (decl);
7728 kind = TLS_MODEL_LOCAL_EXEC;
7730 kind = TLS_MODEL_INITIAL_EXEC;
7733 kind = TLS_MODEL_LOCAL_DYNAMIC;
7735 kind = TLS_MODEL_GLOBAL_DYNAMIC;
7736 if (kind < flag_tls_default)
7737 kind = flag_tls_default;
7739 STRIP_DATALABEL_ENCODING (symbol_str, orig_str);
7740 dlen = symbol_str - orig_str;
7742 encoding = " GLil"[kind];
7743 if (TLS_SYMNAME_P (symbol_str))
7745 if (encoding == symbol_str[1])
7747 /* Handle the changes from initial-exec to local-exec and
7748 from global-dynamic to local-dynamic. */
7749 if ((encoding == 'l' && symbol_str[1] == 'i')
7750 || (encoding == 'L' && symbol_str[1] == 'G'))
7756 len = strlen (symbol_str);
7757 newstr = alloca (dlen + len + 3);
7759 memcpy (newstr, orig_str, dlen);
7760 newstr[dlen + 0] = SH_TLS_ENCODING[0];
7761 newstr[dlen + 1] = encoding;
7762 memcpy (newstr + dlen + 2, symbol_str, len + 1);
7764 XSTR (symbol, 0) = ggc_alloc_string (newstr, dlen + len + 2);
7767 if (TARGET_SH5 && first && TREE_CODE (decl) != FUNCTION_DECL)
7768 XEXP (rtl, 0) = gen_datalabel_ref (symbol);
7771 /* Undo the effects of the above. */
7774 sh_strip_name_encoding (str)
7777 STRIP_DATALABEL_ENCODING (str, str);
7778 STRIP_TLS_ENCODING (str, str);
7785 On the SH1..SH4, the trampoline looks like
7786 2 0002 D202 mov.l l2,r2
7787 1 0000 D301 mov.l l1,r3
7790 5 0008 00000000 l1: .long area
7791 6 000c 00000000 l2: .long function
7793 SH5 (compact) uses r1 instead of r3 for the static chain. */
7796 /* Emit RTL insns to initialize the variable parts of a trampoline.
7797 FNADDR is an RTX for the address of the function's pure code.
7798 CXT is an RTX for the static chain value for the function. */
7801 sh_initialize_trampoline (tramp, fnaddr, cxt)
7802 rtx tramp, fnaddr, cxt;
7804 if (TARGET_SHMEDIA64)
7809 rtx movi1 = GEN_INT (0xcc000010);
7810 rtx shori1 = GEN_INT (0xc8000010);
7813 /* The following trampoline works within a +- 128 KB range for cxt:
7814 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7815 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7816 gettr tr1,r1; blink tr0,r63 */
7817 /* Address rounding makes it hard to compute the exact bounds of the
7818 offset for this trampoline, but we have a rather generous offset
7819 range, so frame_offset should do fine as an upper bound. */
7820 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7822 /* ??? could optimize this trampoline initialization
7823 by writing DImode words with two insns each. */
7824 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7825 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7826 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7827 insn = gen_rtx_AND (DImode, insn, mask);
7828 /* Or in ptb/u .,tr1 pattern */
7829 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7830 insn = force_operand (insn, NULL_RTX);
7831 insn = gen_lowpart (SImode, insn);
7832 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7833 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7834 insn = gen_rtx_AND (DImode, insn, mask);
7835 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7836 insn = gen_lowpart (SImode, insn);
7837 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7838 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7839 insn = gen_rtx_AND (DImode, insn, mask);
7840 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7841 insn = gen_lowpart (SImode, insn);
7842 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7843 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7844 insn = gen_rtx_AND (DImode, insn, mask);
7845 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7846 insn = gen_lowpart (SImode, insn);
7847 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7849 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7850 insn = gen_rtx_AND (DImode, insn, mask);
7851 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7852 insn = gen_lowpart (SImode, insn);
7853 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7855 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7856 GEN_INT (0x6bf10600));
7857 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7858 GEN_INT (0x4415fc10));
7859 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7860 GEN_INT (0x4401fff0));
7861 emit_insn (gen_ic_invalidate_line (tramp));
7864 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7865 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7867 tramp_templ = gen_datalabel_ref (tramp_templ);
7868 dst = gen_rtx_MEM (BLKmode, tramp);
7869 src = gen_rtx_MEM (BLKmode, tramp_templ);
7870 set_mem_align (dst, 256);
7871 set_mem_align (src, 64);
7872 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7874 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7876 emit_move_insn (gen_rtx_MEM (Pmode,
7877 plus_constant (tramp,
7879 + GET_MODE_SIZE (Pmode))),
7881 emit_insn (gen_ic_invalidate_line (tramp));
7884 else if (TARGET_SHMEDIA)
7886 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7887 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7888 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7889 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7890 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7891 rotated 10 right, and higher 16 bit of every 32 selected. */
7893 = force_reg (V2HImode, (simplify_gen_subreg
7894 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7895 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7896 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7898 tramp = force_reg (Pmode, tramp);
7899 fnaddr = force_reg (SImode, fnaddr);
7900 cxt = force_reg (SImode, cxt);
7901 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7902 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7904 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
7905 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7906 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7907 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7908 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7909 gen_rtx_SUBREG (V2HImode, cxt, 0),
7911 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
7912 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7913 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7914 if (TARGET_LITTLE_ENDIAN)
7916 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7917 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7921 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7922 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7924 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7925 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7926 emit_insn (gen_ic_invalidate_line (tramp));
7929 else if (TARGET_SHCOMPACT)
7931 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7934 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7935 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7937 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7938 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7940 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7942 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7946 if (TARGET_USERMODE)
7947 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),
7948 0, VOIDmode, 1, tramp, SImode);
7950 emit_insn (gen_ic_invalidate_line (tramp));
7954 /* FIXME: This is overly conservative. A SHcompact function that
7955 receives arguments ``by reference'' will have them stored in its
7956 own stack frame, so it must not pass pointers or references to
7957 these arguments to other functions by means of sibling calls. */
7959 sh_function_ok_for_sibcall (decl, exp)
7961 tree exp ATTRIBUTE_UNUSED;
7964 && (! TARGET_SHCOMPACT
7965 || current_function_args_info.stack_regs == 0)
7966 && ! sh_cfun_interrupt_handler_p ());
7969 /* Machine specific built-in functions. */
7971 struct builtin_description
7973 const enum insn_code icode;
7974 const char *const name;
7978 /* describe number and signedness of arguments; arg[0] == result
7979 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7980 static const char signature_args[][4] =
7982 #define SH_BLTIN_V2SI2 0
7984 #define SH_BLTIN_V4HI2 1
7986 #define SH_BLTIN_V2SI3 2
7988 #define SH_BLTIN_V4HI3 3
7990 #define SH_BLTIN_V8QI3 4
7992 #define SH_BLTIN_MAC_HISI 5
7994 #define SH_BLTIN_SH_HI 6
7996 #define SH_BLTIN_SH_SI 7
7998 #define SH_BLTIN_V4HI2V2SI 8
8000 #define SH_BLTIN_V4HI2V8QI 9
8002 #define SH_BLTIN_SISF 10
8004 #define SH_BLTIN_LDUA_L 11
8006 #define SH_BLTIN_LDUA_Q 12
8008 #define SH_BLTIN_STUA_L 13
8010 #define SH_BLTIN_STUA_Q 14
8012 #define SH_BLTIN_UDI 15
8014 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
8015 #define SH_BLTIN_2 16
8016 #define SH_BLTIN_SU 16
8018 #define SH_BLTIN_3 17
8019 #define SH_BLTIN_SUS 17
8021 #define SH_BLTIN_PSSV 18
8023 #define SH_BLTIN_XXUU 19
8024 #define SH_BLTIN_UUUU 19
8026 #define SH_BLTIN_PV 20
8029 /* mcmv: operands considered unsigned. */
8030 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
8031 /* mperm: control value considered unsigned int. */
8032 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
8033 /* mshards_q: returns signed short. */
8034 /* nsb: takes long long arg, returns unsigned char. */
8035 static const struct builtin_description bdesc[] =
8037 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
8038 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
8039 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
8040 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
8041 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
8042 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
8043 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
8045 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8046 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8048 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
8049 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
8050 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
8051 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
8052 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
8053 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
8054 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
8055 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
8056 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
8057 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
8058 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
8059 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
8060 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
8061 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
8062 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
8063 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
8064 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
8065 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
8066 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
8067 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
8068 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
8069 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
8070 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
8071 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
8072 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
8073 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
8074 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
8075 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
8076 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
8077 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
8078 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
8079 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
8080 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
8081 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
8082 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
8083 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
8084 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
8085 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
8086 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
8087 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
8088 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
8089 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
8090 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
8091 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
8092 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
8093 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
8094 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
8095 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
8096 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
8097 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
8098 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
8099 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
8100 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
8101 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
8103 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8104 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8105 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8106 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8107 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8108 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8109 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8110 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8111 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8112 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8113 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8114 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8115 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8116 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8117 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8118 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8120 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
8121 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
8123 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8124 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8129 sh_media_init_builtins ()
8131 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8132 const struct builtin_description *d;
8134 memset (shared, 0, sizeof shared);
8135 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
8137 tree type, arg_type;
8138 int signature = d->signature;
8141 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8142 type = shared[signature];
8145 int has_result = signature_args[signature][0] != 0;
8147 if (signature_args[signature][1] == 8
8148 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8150 if (! TARGET_FPU_ANY
8151 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8153 type = void_list_node;
8156 int arg = signature_args[signature][i];
8157 int opno = i - 1 + has_result;
8160 arg_type = ptr_type_node;
8162 arg_type = ((*lang_hooks.types.type_for_mode)
8163 (insn_data[d->icode].operand[opno].mode,
8168 arg_type = void_type_node;
8171 type = tree_cons (NULL_TREE, arg_type, type);
8173 type = build_function_type (arg_type, type);
8174 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8175 shared[signature] = type;
8177 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8186 sh_media_init_builtins ();
8189 /* Expand an expression EXP that calls a built-in function,
8190 with result going to TARGET if that's convenient
8191 (and in mode MODE if that's convenient).
8192 SUBTARGET may be used as the target for computing one of EXP's operands.
8193 IGNORE is nonzero if the value is to be ignored. */
8196 sh_expand_builtin (exp, target, subtarget, mode, ignore)
8199 rtx subtarget ATTRIBUTE_UNUSED;
8200 enum machine_mode mode ATTRIBUTE_UNUSED;
8203 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8204 tree arglist = TREE_OPERAND (exp, 1);
8205 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8206 const struct builtin_description *d = &bdesc[fcode];
8207 enum insn_code icode = d->icode;
8208 int signature = d->signature;
8209 enum machine_mode tmode = VOIDmode;
8214 if (signature_args[signature][0])
8219 tmode = insn_data[icode].operand[0].mode;
8221 || GET_MODE (target) != tmode
8222 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8223 target = gen_reg_rtx (tmode);
8229 for (i = 1; i <= 3; i++, nop++)
8232 enum machine_mode opmode, argmode;
8234 if (! signature_args[signature][i])
8236 arg = TREE_VALUE (arglist);
8237 if (arg == error_mark_node)
8239 arglist = TREE_CHAIN (arglist);
8240 opmode = insn_data[icode].operand[nop].mode;
8241 argmode = TYPE_MODE (TREE_TYPE (arg));
8242 if (argmode != opmode)
8243 arg = build1 (NOP_EXPR,
8244 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8245 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8246 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8247 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8253 pat = (*insn_data[d->icode].genfun) (op[0]);
8256 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8259 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8262 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8274 sh_expand_unop_v2sf (code, op0, op1)
8278 rtx sel0 = const0_rtx;
8279 rtx sel1 = const1_rtx;
8280 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
8281 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8283 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8284 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8288 sh_expand_binop_v2sf (code, op0, op1, op2)
8292 rtx sel0 = const0_rtx;
8293 rtx sel1 = const1_rtx;
8294 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx))
8296 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8298 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8299 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8302 /* Return the class of registers for which a mode change from FROM to TO
8305 sh_cannot_change_mode_class (from, to, class)
8306 enum machine_mode from, to;
8307 enum reg_class class;
8309 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8311 if (TARGET_LITTLE_ENDIAN)
8313 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8314 return reg_classes_intersect_p (DF_REGS, class);
8318 if (GET_MODE_SIZE (from) < 8)
8319 return reg_classes_intersect_p (DF_HI_REGS, class);
8326 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8327 that label is used. */
8330 sh_mark_label (address, nuses)
8334 if (GOTOFF_P (address))
8336 /* Extract the label or symbol. */
8337 address = XEXP (address, 0);
8338 if (GET_CODE (address) == PLUS)
8339 address = XEXP (address, 0);
8340 address = XVECEXP (address, 0, 0);
8342 if (GET_CODE (address) == LABEL_REF
8343 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8344 LABEL_NUSES (XEXP (address, 0)) += nuses;
8347 /* Compute extra cost of moving data between one register class
8350 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8351 uses this information. Hence, the general register <-> floating point
8352 register information here is not used for SFmode. */
8355 sh_register_move_cost (mode, srcclass, dstclass)
8356 enum machine_mode mode;
8357 enum reg_class srcclass, dstclass;
8359 if (dstclass == T_REGS || dstclass == PR_REGS)
8362 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8363 && REGCLASS_HAS_FP_REG (srcclass)
8364 && REGCLASS_HAS_FP_REG (dstclass))
8367 if ((REGCLASS_HAS_FP_REG (dstclass)
8368 && REGCLASS_HAS_GENERAL_REG (srcclass))
8369 || (REGCLASS_HAS_GENERAL_REG (dstclass)
8370 && REGCLASS_HAS_FP_REG (srcclass)))
8371 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
8372 * ((GET_MODE_SIZE (mode) + 7) / 8U));
8374 if ((dstclass == FPUL_REGS
8375 && REGCLASS_HAS_GENERAL_REG (srcclass))
8376 || (srcclass == FPUL_REGS
8377 && REGCLASS_HAS_GENERAL_REG (dstclass)))
8380 if ((dstclass == FPUL_REGS
8381 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
8382 || (srcclass == FPUL_REGS
8383 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
8386 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8387 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8390 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8391 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8396 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
8397 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
8398 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
8400 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
8403 /* Like register_operand, but take into account that SHMEDIA can use
8404 the constant zero like a general register. */
8406 sh_register_operand (op, mode)
8408 enum machine_mode mode;
8410 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
8412 return register_operand (op, mode);
8415 static rtx emit_load_ptr PARAMS ((rtx, rtx));
8418 emit_load_ptr (reg, addr)
8421 rtx mem = gen_rtx_MEM (ptr_mode, addr);
8423 if (Pmode != ptr_mode)
8424 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
8425 return emit_move_insn (reg, mem);
8429 sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
8431 tree thunk_fndecl ATTRIBUTE_UNUSED;
8432 HOST_WIDE_INT delta;
8433 HOST_WIDE_INT vcall_offset;
8436 CUMULATIVE_ARGS cum;
8437 int structure_value_byref = 0;
8438 rtx this, this_value, sibcall, insns, funexp;
8439 tree funtype = TREE_TYPE (function);
8441 = (TARGET_SHMEDIA ? CONST_OK_FOR_J (delta) : CONST_OK_FOR_I (delta));
8443 rtx scratch0, scratch1, scratch2;
8445 reload_completed = 1;
8447 current_function_uses_only_leaf_regs = 1;
8449 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8451 /* Find the "this" pointer. We have such a wide range of ABIs for the
8452 SH that it's best to do this completely machine independently.
8453 "this" is passed as first argument, unless a structure return pointer
8454 comes first, in which case "this" comes second. */
8455 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0);
8456 #ifndef PCC_STATIC_STRUCT_RETURN
8457 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
8458 structure_value_byref = 1;
8459 #endif /* not PCC_STATIC_STRUCT_RETURN */
8460 if (structure_value_byref && struct_value_rtx == 0)
8462 tree ptype = build_pointer_type (TREE_TYPE (funtype));
8464 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
8466 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
8468 /* For SHcompact, we only have r0 for a scratch register: r1 is the
8469 static chain pointer (even if you can't have nested virtual functions
8470 right now, someone might implement them sometime), and the rest of the
8471 registers are used for argument passing, are callee-saved, or reserved. */
8472 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
8475 scratch1 = gen_rtx_REG (ptr_mode, 1);
8476 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
8477 pointing where to return struct values. */
8478 scratch2 = gen_rtx_REG (Pmode, 3);
8480 else if (TARGET_SHMEDIA)
8482 scratch1 = gen_rtx_REG (ptr_mode, 21);
8483 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
8486 this_value = plus_constant (this, delta);
8488 && (simple_add || scratch0 != scratch1)
8489 && strict_memory_address_p (ptr_mode, this_value))
8491 emit_load_ptr (scratch0, this_value);
8497 else if (simple_add)
8498 emit_move_insn (this, this_value);
8501 emit_move_insn (scratch1, GEN_INT (delta));
8502 emit_insn (gen_add2_insn (this, scratch1));
8510 emit_load_ptr (scratch0, this);
8512 offset_addr = plus_constant (scratch0, vcall_offset);
8513 if (strict_memory_address_p (ptr_mode, offset_addr))
8515 else if (! TARGET_SH5)
8517 /* scratch0 != scratch1, and we have indexed loads. Get better
8518 schedule by loading the offset into r1 and using an indexed
8519 load - then the load of r1 can issue before the load from
8520 (this + delta) finishes. */
8521 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8522 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
8524 else if (TARGET_SHMEDIA
8525 ? CONST_OK_FOR_J (vcall_offset)
8526 : CONST_OK_FOR_I (vcall_offset))
8528 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
8529 offset_addr = scratch0;
8531 else if (scratch0 != scratch1)
8533 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8534 emit_insn (gen_add2_insn (scratch0, scratch1));
8535 offset_addr = scratch0;
8538 abort (); /* FIXME */
8539 emit_load_ptr (scratch0, offset_addr);
8541 if (Pmode != ptr_mode)
8542 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
8543 emit_insn (gen_add2_insn (this, scratch0));
8546 /* Generate a tail call to the target function. */
8547 if (! TREE_USED (function))
8549 assemble_external (function);
8550 TREE_USED (function) = 1;
8552 funexp = XEXP (DECL_RTL (function), 0);
8553 emit_move_insn (scratch2, funexp);
8554 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
8555 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
8556 SIBLING_CALL_P (sibcall) = 1;
8557 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
8560 /* Run just enough of rest_of_compilation to do scheduling and get
8561 the insns emitted. Note that use_thunk calls
8562 assemble_start_function and assemble_end_function. */
8563 insns = get_insns ();
8565 if (optimize > 0 && flag_schedule_insns_after_reload)
8568 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
8569 life_analysis (insns, rtl_dump_file, PROP_FINAL);
8571 split_all_insns (1);
8573 schedule_insns (rtl_dump_file);
8576 MACHINE_DEPENDENT_REORG (insns);
8578 if (optimize > 0 && flag_delayed_branch)
8579 dbr_schedule (insns, rtl_dump_file);
8580 shorten_branches (insns);
8581 final_start_function (insns, file, 1);
8582 final (insns, file, 1, 0);
8583 final_end_function ();
8585 if (optimize > 0 && flag_schedule_insns_after_reload)
8587 /* Release all memory allocated by flow. */
8588 free_basic_block_vars (0);
8590 /* Release all memory held by regsets now. */
8591 regset_release_memory ();
8594 reload_completed = 0;