1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
51 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
53 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
54 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
56 /* These are some macros to abstract register modes. */
57 #define CONST_OK_FOR_ADD(size) \
58 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
59 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
60 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
61 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
63 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
64 int current_function_interrupt;
66 /* ??? The pragma interrupt support will not work for SH3. */
67 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
68 output code for the next function appropriate for an interrupt handler. */
71 /* This is set by the trap_exit attribute for functions. It specifies
72 a trap number to be used in a trapa instruction at function exit
73 (instead of an rte instruction). */
76 /* This is used by the sp_switch attribute for functions. It specifies
77 a variable holding the address of the stack the interrupt function
78 should switch to/from at entry/exit. */
81 /* This is set by #pragma trapa, and is similar to the above, except that
82 the compiler doesn't emit code to preserve all registers. */
83 static int pragma_trapa;
85 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
86 which has a separate set of low regs for User and Supervisor modes.
87 This should only be used for the lowest level of interrupts. Higher levels
88 of interrupts must save the registers in case they themselves are
90 int pragma_nosave_low_regs;
92 /* This is used for communication between SETUP_INCOMING_VARARGS and
93 sh_expand_prologue. */
94 int current_function_anonymous_args;
96 /* Global variables for machine-dependent things. */
98 /* Which cpu are we scheduling for. */
99 enum processor_type sh_cpu;
101 /* Saved operands from the last compare to use when we generate an scc
107 /* Provides the class number of the smallest class containing
110 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
112 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
149 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
153 char sh_register_names[FIRST_PSEUDO_REGISTER] \
154 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
156 char sh_additional_register_names[ADDREGNAMES_SIZE] \
157 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
158 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
160 /* Provide reg_class from a letter such as appears in the machine
161 description. *: target independently reserved letter.
162 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
164 enum reg_class reg_class_from_letter[] =
166 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
167 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
168 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
169 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
170 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
171 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
172 /* y */ FPUL_REGS, /* z */ R0_REGS
175 int assembler_dialect;
177 static void split_branches PARAMS ((rtx));
178 static int branch_dest PARAMS ((rtx));
179 static void force_into PARAMS ((rtx, rtx));
180 static void print_slot PARAMS ((rtx));
181 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
182 static void dump_table PARAMS ((rtx));
183 static int hi_const PARAMS ((rtx));
184 static int broken_move PARAMS ((rtx));
185 static int mova_p PARAMS ((rtx));
186 static rtx find_barrier PARAMS ((int, rtx, rtx));
187 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
188 static rtx gen_block_redirect PARAMS ((rtx, int, int));
189 static void sh_reorg PARAMS ((void));
190 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
191 static rtx frame_insn PARAMS ((rtx));
192 static rtx push PARAMS ((int));
193 static void pop PARAMS ((int));
194 static void push_regs PARAMS ((HARD_REG_SET *, int));
195 static int calc_live_regs PARAMS ((HARD_REG_SET *));
196 static void mark_use PARAMS ((rtx, rtx *));
197 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
198 static rtx mark_constant_pool_use PARAMS ((rtx));
199 const struct attribute_spec sh_attribute_table[];
200 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
201 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
202 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
203 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
204 static void sh_insert_attributes PARAMS ((tree, tree *));
205 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
206 static int sh_use_dfa_interface PARAMS ((void));
207 static int sh_issue_rate PARAMS ((void));
208 static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
210 static bool sh_cannot_modify_jumps_p PARAMS ((void));
211 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
213 static void sh_init_builtins PARAMS ((void));
214 static void sh_media_init_builtins PARAMS ((void));
215 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
216 static void sh_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
217 HOST_WIDE_INT, tree));
218 static int flow_dependent_p PARAMS ((rtx, rtx));
219 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
220 static int shiftcosts PARAMS ((rtx));
221 static int andcosts PARAMS ((rtx));
222 static int addsubcosts PARAMS ((rtx));
223 static int multcosts PARAMS ((rtx));
224 static bool unspec_caller_rtx_p PARAMS ((rtx));
225 static bool sh_cannot_copy_insn_p PARAMS ((rtx));
226 static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
227 static int sh_address_cost PARAMS ((rtx));
229 /* Initialize the GCC target structure. */
230 #undef TARGET_ATTRIBUTE_TABLE
231 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
233 /* The next two are used for debug info when compiling with -gdwarf. */
234 #undef TARGET_ASM_UNALIGNED_HI_OP
235 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
236 #undef TARGET_ASM_UNALIGNED_SI_OP
237 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
239 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
240 #undef TARGET_ASM_UNALIGNED_DI_OP
241 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
242 #undef TARGET_ASM_ALIGNED_DI_OP
243 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
245 #undef TARGET_ASM_FUNCTION_EPILOGUE
246 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
248 #undef TARGET_ASM_OUTPUT_MI_THUNK
249 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
251 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
252 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
254 #undef TARGET_INSERT_ATTRIBUTES
255 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
257 #undef TARGET_SCHED_ADJUST_COST
258 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
260 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
261 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
263 #undef TARGET_SCHED_ISSUE_RATE
264 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
266 #undef TARGET_CANNOT_MODIFY_JUMPS_P
267 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
269 #undef TARGET_MS_BITFIELD_LAYOUT_P
270 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
272 #undef TARGET_INIT_BUILTINS
273 #define TARGET_INIT_BUILTINS sh_init_builtins
274 #undef TARGET_EXPAND_BUILTIN
275 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
277 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
278 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
280 #undef TARGET_CANNOT_COPY_INSN_P
281 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
282 #undef TARGET_RTX_COSTS
283 #define TARGET_RTX_COSTS sh_rtx_costs
284 #undef TARGET_ADDRESS_COST
285 #define TARGET_ADDRESS_COST sh_address_cost
287 #undef TARGET_MACHINE_DEPENDENT_REORG
288 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
291 #undef TARGET_HAVE_TLS
292 #define TARGET_HAVE_TLS true
295 struct gcc_target targetm = TARGET_INITIALIZER;
297 /* Print the operand address in x to the stream. */
300 print_operand_address (stream, x)
304 switch (GET_CODE (x))
308 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
313 rtx base = XEXP (x, 0);
314 rtx index = XEXP (x, 1);
316 switch (GET_CODE (index))
319 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
320 reg_names[true_regnum (base)]);
326 int base_num = true_regnum (base);
327 int index_num = true_regnum (index);
329 fprintf (stream, "@(r0,%s)",
330 reg_names[MAX (base_num, index_num)]);
342 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
346 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
350 x = mark_constant_pool_use (x);
351 output_addr_const (stream, x);
356 /* Print operand x (an rtx) in assembler syntax to file stream
357 according to modifier code.
359 '.' print a .s if insn needs delay slot
360 ',' print LOCAL_LABEL_PREFIX
361 '@' print trap, rte or rts depending upon pragma interruptness
362 '#' output a nop if there is nothing to put in the delay slot
363 ''' print likelyhood suffix (/u for unlikely).
364 'O' print a constant without the #
365 'R' print the LSW of a dp value - changes if in little endian
366 'S' print the MSW of a dp value - changes if in little endian
367 'T' print the next word of a dp value - same as 'R' in big endian mode.
368 'M' print an `x' if `m' will print `base,index'.
369 'N' print 'r63' if the operand is (const_int 0).
370 'm' print a pair `base,offset' or `base,index', for LD and ST.
371 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
372 'o' output an operator. */
375 print_operand (stream, x, code)
384 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
385 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
386 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
389 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
393 fprintf (stream, "trapa #%d", trap_exit);
394 else if (sh_cfun_interrupt_handler_p ())
395 fprintf (stream, "rte");
397 fprintf (stream, "rts");
400 /* Output a nop if there's nothing in the delay slot. */
401 if (dbr_sequence_length () == 0)
402 fprintf (stream, "\n\tnop");
406 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
408 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
409 fputs ("/u", stream);
413 x = mark_constant_pool_use (x);
414 output_addr_const (stream, x);
417 fputs (reg_names[REGNO (x) + LSW], (stream));
420 fputs (reg_names[REGNO (x) + MSW], (stream));
423 /* Next word of a double. */
424 switch (GET_CODE (x))
427 fputs (reg_names[REGNO (x) + 1], (stream));
430 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
431 && GET_CODE (XEXP (x, 0)) != POST_INC)
432 x = adjust_address (x, SImode, 4);
433 print_operand_address (stream, XEXP (x, 0));
440 switch (GET_CODE (x))
442 case PLUS: fputs ("add", stream); break;
443 case MINUS: fputs ("sub", stream); break;
444 case MULT: fputs ("mul", stream); break;
445 case DIV: fputs ("div", stream); break;
446 case EQ: fputs ("eq", stream); break;
447 case NE: fputs ("ne", stream); break;
448 case GT: case LT: fputs ("gt", stream); break;
449 case GE: case LE: fputs ("ge", stream); break;
450 case GTU: case LTU: fputs ("gtu", stream); break;
451 case GEU: case LEU: fputs ("geu", stream); break;
457 if (GET_CODE (x) == MEM
458 && GET_CODE (XEXP (x, 0)) == PLUS
459 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
460 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
465 if (GET_CODE (x) != MEM)
468 switch (GET_CODE (x))
472 print_operand (stream, x, 0);
473 fputs (", 0", stream);
477 print_operand (stream, XEXP (x, 0), 0);
478 fputs (", ", stream);
479 print_operand (stream, XEXP (x, 1), 0);
488 if (x == CONST0_RTX (GET_MODE (x)))
490 fprintf ((stream), "r63");
495 if (GET_CODE (x) == CONST_INT)
497 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
504 switch (GET_CODE (x))
506 /* FIXME: We need this on SHmedia32 because reload generates
507 some sign-extended HI or QI loads into DImode registers
508 but, because Pmode is SImode, the address ends up with a
509 subreg:SI of the DImode register. Maybe reload should be
510 fixed so as to apply alter_subreg to such loads? */
512 if (SUBREG_BYTE (x) != 0
513 || GET_CODE (SUBREG_REG (x)) != REG)
520 if (FP_REGISTER_P (REGNO (x))
521 && GET_MODE (x) == V16SFmode)
522 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
523 else if (FP_REGISTER_P (REGNO (x))
524 && GET_MODE (x) == V4SFmode)
525 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
526 else if (GET_CODE (x) == REG
527 && GET_MODE (x) == V2SFmode)
528 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
529 else if (FP_REGISTER_P (REGNO (x))
530 && GET_MODE_SIZE (GET_MODE (x)) > 4)
531 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
533 fputs (reg_names[REGNO (x)], (stream));
537 output_address (XEXP (x, 0));
542 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
543 && GET_MODE (XEXP (x, 0)) == DImode
544 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
545 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
547 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
550 if (GET_CODE (val) == ASHIFTRT)
553 if (GET_CODE (XEXP (val, 0)) == CONST)
555 output_addr_const (stream, XEXP (val, 0));
556 if (GET_CODE (XEXP (val, 0)) == CONST)
558 fputs (" >> ", stream);
559 output_addr_const (stream, XEXP (val, 1));
564 if (GET_CODE (val) == CONST)
566 output_addr_const (stream, val);
567 if (GET_CODE (val) == CONST)
570 fputs (" & 65535)", stream);
578 output_addr_const (stream, x);
585 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
587 force_into (value, target)
590 value = force_operand (value, target);
591 if (! rtx_equal_p (value, target))
592 emit_insn (gen_move_insn (target, value));
595 /* Emit code to perform a block move. Choose the best method.
597 OPERANDS[0] is the destination.
598 OPERANDS[1] is the source.
599 OPERANDS[2] is the size.
600 OPERANDS[3] is the alignment safe to use. */
603 expand_block_move (operands)
606 int align = INTVAL (operands[3]);
607 int constp = (GET_CODE (operands[2]) == CONST_INT);
608 int bytes = (constp ? INTVAL (operands[2]) : 0);
610 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
611 alignment, or if it isn't a multiple of 4 bytes, then fail. */
612 if (! constp || align < 4 || (bytes % 4 != 0))
619 else if (bytes == 12)
624 rtx r4 = gen_rtx (REG, SImode, 4);
625 rtx r5 = gen_rtx (REG, SImode, 5);
627 entry_name = get_identifier ("__movstrSI12_i4");
629 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
630 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
631 force_into (XEXP (operands[0], 0), r4);
632 force_into (XEXP (operands[1], 0), r5);
633 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
636 else if (! TARGET_SMALLCODE)
642 rtx r4 = gen_rtx (REG, SImode, 4);
643 rtx r5 = gen_rtx (REG, SImode, 5);
644 rtx r6 = gen_rtx (REG, SImode, 6);
646 entry_name = get_identifier (bytes & 4
648 : "__movstr_i4_even");
649 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
650 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
651 force_into (XEXP (operands[0], 0), r4);
652 force_into (XEXP (operands[1], 0), r5);
655 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
656 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
668 rtx r4 = gen_rtx_REG (SImode, 4);
669 rtx r5 = gen_rtx_REG (SImode, 5);
671 sprintf (entry, "__movstrSI%d", bytes);
672 entry_name = get_identifier (entry);
673 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
674 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
675 force_into (XEXP (operands[0], 0), r4);
676 force_into (XEXP (operands[1], 0), r5);
677 emit_insn (gen_block_move_real (func_addr_rtx));
681 /* This is the same number of bytes as a memcpy call, but to a different
682 less common function name, so this will occasionally use more space. */
683 if (! TARGET_SMALLCODE)
688 int final_switch, while_loop;
689 rtx r4 = gen_rtx_REG (SImode, 4);
690 rtx r5 = gen_rtx_REG (SImode, 5);
691 rtx r6 = gen_rtx_REG (SImode, 6);
693 entry_name = get_identifier ("__movstr");
694 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
695 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
696 force_into (XEXP (operands[0], 0), r4);
697 force_into (XEXP (operands[1], 0), r5);
699 /* r6 controls the size of the move. 16 is decremented from it
700 for each 64 bytes moved. Then the negative bit left over is used
701 as an index into a list of move instructions. e.g., a 72 byte move
702 would be set up with size(r6) = 14, for one iteration through the
703 big while loop, and a switch of -2 for the last part. */
705 final_switch = 16 - ((bytes / 4) % 16);
706 while_loop = ((bytes / 4) / 16 - 1) * 16;
707 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
708 emit_insn (gen_block_lump_real (func_addr_rtx));
715 /* Prepare operands for a move define_expand; specifically, one of the
716 operands must be in a register. */
719 prepare_move_operands (operands, mode)
721 enum machine_mode mode;
723 if ((mode == SImode || mode == DImode)
725 && ! ((mode == Pmode || mode == ptr_mode)
726 && tls_symbolic_operand (operands[1], Pmode) != 0))
729 if (SYMBOLIC_CONST_P (operands[1]))
731 if (GET_CODE (operands[0]) == MEM)
732 operands[1] = force_reg (Pmode, operands[1]);
733 else if (TARGET_SHMEDIA
734 && GET_CODE (operands[1]) == LABEL_REF
735 && target_reg_operand (operands[0], mode))
739 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
740 operands[1] = legitimize_pic_address (operands[1], mode, temp);
743 else if (GET_CODE (operands[1]) == CONST
744 && GET_CODE (XEXP (operands[1], 0)) == PLUS
745 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
747 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
748 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
750 operands[1] = expand_binop (mode, add_optab, temp,
751 XEXP (XEXP (operands[1], 0), 1),
752 no_new_pseudos ? temp
753 : gen_reg_rtx (Pmode),
758 if (! reload_in_progress && ! reload_completed)
760 /* Copy the source to a register if both operands aren't registers. */
761 if (! register_operand (operands[0], mode)
762 && ! sh_register_operand (operands[1], mode))
763 operands[1] = copy_to_mode_reg (mode, operands[1]);
765 /* This case can happen while generating code to move the result
766 of a library call to the target. Reject `st r0,@(rX,rY)' because
767 reload will fail to find a spill register for rX, since r0 is already
768 being used for the source. */
769 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
770 && GET_CODE (operands[0]) == MEM
771 && GET_CODE (XEXP (operands[0], 0)) == PLUS
772 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
773 operands[1] = copy_to_mode_reg (mode, operands[1]);
776 if (mode == Pmode || mode == ptr_mode)
779 enum tls_model tls_kind;
783 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
785 rtx tga_op1, tga_ret, tmp, tmp2;
790 case TLS_MODEL_GLOBAL_DYNAMIC:
791 tga_ret = gen_rtx_REG (Pmode, R0_REG);
792 emit_insn (gen_tls_global_dynamic (tga_ret, op1));
796 case TLS_MODEL_LOCAL_DYNAMIC:
797 tga_ret = gen_rtx_REG (Pmode, R0_REG);
798 emit_insn (gen_tls_local_dynamic (tga_ret, op1));
800 tmp = gen_reg_rtx (Pmode);
801 emit_move_insn (tmp, tga_ret);
803 if (register_operand (op0, Pmode))
806 tmp2 = gen_reg_rtx (Pmode);
808 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
812 case TLS_MODEL_INITIAL_EXEC:
814 emit_insn (gen_GOTaddr2picreg ());
815 tga_op1 = gen_reg_rtx (Pmode);
816 tmp = gen_sym2GOTTPOFF (op1);
817 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
821 case TLS_MODEL_LOCAL_EXEC:
822 tmp2 = gen_reg_rtx (Pmode);
823 emit_insn (gen_load_gbr (tmp2));
824 tmp = gen_reg_rtx (Pmode);
825 emit_insn (gen_symTPOFF2reg (tmp, op1));
826 RTX_UNCHANGING_P (tmp) = 1;
828 if (register_operand (op0, Pmode))
831 op1 = gen_reg_rtx (Pmode);
833 emit_insn (gen_addsi3 (op1, tmp, tmp2));
846 /* Prepare the operands for an scc instruction; make sure that the
847 compare has been done. */
849 prepare_scc_operands (code)
852 rtx t_reg = gen_rtx_REG (SImode, T_REG);
853 enum rtx_code oldcode = code;
854 enum machine_mode mode;
856 /* First need a compare insn. */
860 /* It isn't possible to handle this case. */
879 rtx tmp = sh_compare_op0;
880 sh_compare_op0 = sh_compare_op1;
881 sh_compare_op1 = tmp;
884 mode = GET_MODE (sh_compare_op0);
885 if (mode == VOIDmode)
886 mode = GET_MODE (sh_compare_op1);
888 sh_compare_op0 = force_reg (mode, sh_compare_op0);
889 if ((code != EQ && code != NE
890 && (sh_compare_op1 != const0_rtx
891 || code == GTU || code == GEU || code == LTU || code == LEU))
892 || (mode == DImode && sh_compare_op1 != const0_rtx)
893 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
894 sh_compare_op1 = force_reg (mode, sh_compare_op1);
896 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
897 (mode == SFmode ? emit_sf_insn : emit_df_insn)
898 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
899 gen_rtx (SET, VOIDmode, t_reg,
900 gen_rtx (code, SImode,
901 sh_compare_op0, sh_compare_op1)),
902 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
904 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
905 gen_rtx (code, SImode, sh_compare_op0,
911 /* Called from the md file, set up the operands of a compare instruction. */
914 from_compare (operands, code)
918 enum machine_mode mode = GET_MODE (sh_compare_op0);
920 if (mode == VOIDmode)
921 mode = GET_MODE (sh_compare_op1);
924 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
926 /* Force args into regs, since we can't use constants here. */
927 sh_compare_op0 = force_reg (mode, sh_compare_op0);
928 if (sh_compare_op1 != const0_rtx
929 || code == GTU || code == GEU
930 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
931 sh_compare_op1 = force_reg (mode, sh_compare_op1);
933 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
935 from_compare (operands, GT);
936 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
939 insn = gen_rtx_SET (VOIDmode,
940 gen_rtx_REG (SImode, T_REG),
941 gen_rtx (code, SImode, sh_compare_op0,
943 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
945 insn = gen_rtx (PARALLEL, VOIDmode,
947 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
948 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
954 /* Functions to output assembly code. */
956 /* Return a sequence of instructions to perform DI or DF move.
958 Since the SH cannot move a DI or DF in one instruction, we have
959 to take care when we see overlapping source and dest registers. */
962 output_movedouble (insn, operands, mode)
963 rtx insn ATTRIBUTE_UNUSED;
965 enum machine_mode mode;
967 rtx dst = operands[0];
968 rtx src = operands[1];
970 if (GET_CODE (dst) == MEM
971 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
972 return "mov.l %T1,%0\n\tmov.l %1,%0";
974 if (register_operand (dst, mode)
975 && register_operand (src, mode))
977 if (REGNO (src) == MACH_REG)
978 return "sts mach,%S0\n\tsts macl,%R0";
980 /* When mov.d r1,r2 do r2->r3 then r1->r2;
981 when mov.d r1,r0 do r1->r0 then r2->r1. */
983 if (REGNO (src) + 1 == REGNO (dst))
984 return "mov %T1,%T0\n\tmov %1,%0";
986 return "mov %1,%0\n\tmov %T1,%T0";
988 else if (GET_CODE (src) == CONST_INT)
990 if (INTVAL (src) < 0)
991 output_asm_insn ("mov #-1,%S0", operands);
993 output_asm_insn ("mov #0,%S0", operands);
997 else if (GET_CODE (src) == MEM)
1000 int dreg = REGNO (dst);
1001 rtx inside = XEXP (src, 0);
1003 if (GET_CODE (inside) == REG)
1004 ptrreg = REGNO (inside);
1005 else if (GET_CODE (inside) == SUBREG)
1006 ptrreg = subreg_regno (inside);
1007 else if (GET_CODE (inside) == PLUS)
1009 ptrreg = REGNO (XEXP (inside, 0));
1010 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1011 an offsettable address. Unfortunately, offsettable addresses use
1012 QImode to check the offset, and a QImode offsettable address
1013 requires r0 for the other operand, which is not currently
1014 supported, so we can't use the 'o' constraint.
1015 Thus we must check for and handle r0+REG addresses here.
1016 We punt for now, since this is likely very rare. */
1017 if (GET_CODE (XEXP (inside, 1)) == REG)
1020 else if (GET_CODE (inside) == LABEL_REF)
1021 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1022 else if (GET_CODE (inside) == POST_INC)
1023 return "mov.l %1,%0\n\tmov.l %1,%T0";
1027 /* Work out the safe way to copy. Copy into the second half first. */
1029 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1032 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1035 /* Print an instruction which would have gone into a delay slot after
1036 another instruction, but couldn't because the other instruction expanded
1037 into a sequence where putting the slot insn at the end wouldn't work. */
1043 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
1045 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1049 output_far_jump (insn, op)
1053 struct { rtx lab, reg, op; } this;
1054 rtx braf_base_lab = NULL_RTX;
1057 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1060 this.lab = gen_label_rtx ();
1064 && offset - get_attr_length (insn) <= 32766)
1067 jump = "mov.w %O0,%1; braf %1";
1075 jump = "mov.l %O0,%1; braf %1";
1077 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1080 jump = "mov.l %O0,%1; jmp @%1";
1082 /* If we have a scratch register available, use it. */
1083 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1084 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1086 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1087 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1088 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1089 output_asm_insn (jump, &this.lab);
1090 if (dbr_sequence_length ())
1091 print_slot (final_sequence);
1093 output_asm_insn ("nop", 0);
1097 /* Output the delay slot insn first if any. */
1098 if (dbr_sequence_length ())
1099 print_slot (final_sequence);
1101 this.reg = gen_rtx_REG (SImode, 13);
1102 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1103 Fortunately, MACL is fixed and call-clobbered, and we never
1104 need its value across jumps, so save r13 in it instead of in
1107 output_asm_insn ("lds r13, macl", 0);
1109 output_asm_insn ("mov.l r13,@-r15", 0);
1110 output_asm_insn (jump, &this.lab);
1112 output_asm_insn ("sts macl, r13", 0);
1114 output_asm_insn ("mov.l @r15+,r13", 0);
1116 if (far && flag_pic && TARGET_SH2)
1118 braf_base_lab = gen_label_rtx ();
1119 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1120 CODE_LABEL_NUMBER (braf_base_lab));
1123 output_asm_insn (".align 2", 0);
1124 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1126 if (far && flag_pic)
1129 this.lab = braf_base_lab;
1130 output_asm_insn (".long %O2-%O0", &this.lab);
1133 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1137 /* Local label counter, used for constants in the pool and inside
1138 pattern branches. */
1140 static int lf = 100;
1142 /* Output code for ordinary branches. */
1145 output_branch (logic, insn, operands)
1150 switch (get_attr_length (insn))
1153 /* This can happen if filling the delay slot has caused a forward
1154 branch to exceed its range (we could reverse it, but only
1155 when we know we won't overextend other branches; this should
1156 best be handled by relaxation).
1157 It can also happen when other condbranches hoist delay slot insn
1158 from their destination, thus leading to code size increase.
1159 But the branch will still be in the range -4092..+4098 bytes. */
1164 /* The call to print_slot will clobber the operands. */
1165 rtx op0 = operands[0];
1167 /* If the instruction in the delay slot is annulled (true), then
1168 there is no delay slot where we can put it now. The only safe
1169 place for it is after the label. final will do that by default. */
1172 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1174 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1175 ASSEMBLER_DIALECT ? "/" : ".", label);
1176 print_slot (final_sequence);
1179 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1181 output_asm_insn ("bra\t%l0", &op0);
1182 fprintf (asm_out_file, "\tnop\n");
1183 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1187 /* When relaxing, handle this like a short branch. The linker
1188 will fix it up if it still doesn't fit after relaxation. */
1190 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1192 /* These are for SH2e, in which we have to account for the
1193 extra nop because of the hardware bug in annulled branches. */
1200 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1202 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1204 ASSEMBLER_DIALECT ? "/" : ".", label);
1205 fprintf (asm_out_file, "\tnop\n");
1206 output_asm_insn ("bra\t%l0", operands);
1207 fprintf (asm_out_file, "\tnop\n");
1208 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1212 /* When relaxing, fall through. */
1217 sprintf (buffer, "b%s%ss\t%%l0",
1219 ASSEMBLER_DIALECT ? "/" : ".");
1220 output_asm_insn (buffer, &operands[0]);
1225 /* There should be no longer branches now - that would
1226 indicate that something has destroyed the branches set
1227 up in machine_dependent_reorg. */
1233 output_branchy_insn (code, template, insn, operands)
1235 const char *template;
1239 rtx next_insn = NEXT_INSN (insn);
1241 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1243 rtx src = SET_SRC (PATTERN (next_insn));
1244 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1246 /* Following branch not taken */
1247 operands[9] = gen_label_rtx ();
1248 emit_label_after (operands[9], next_insn);
1249 INSN_ADDRESSES_NEW (operands[9],
1250 INSN_ADDRESSES (INSN_UID (next_insn))
1251 + get_attr_length (next_insn));
1256 int offset = (branch_dest (next_insn)
1257 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1258 if (offset >= -252 && offset <= 258)
1260 if (GET_CODE (src) == IF_THEN_ELSE)
1262 src = XEXP (src, 1);
1268 operands[9] = gen_label_rtx ();
1269 emit_label_after (operands[9], insn);
1270 INSN_ADDRESSES_NEW (operands[9],
1271 INSN_ADDRESSES (INSN_UID (insn))
1272 + get_attr_length (insn));
1277 output_ieee_ccmpeq (insn, operands)
1278 rtx insn, *operands;
1280 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1283 /* Output to FILE the start of the assembler file. */
1286 output_file_start (file)
1289 output_file_directive (file, main_input_filename);
1291 /* Switch to the data section so that the coffsem symbol
1292 isn't in the text section. */
1295 if (TARGET_LITTLE_ENDIAN)
1296 fprintf (file, "\t.little\n");
1298 if (TARGET_SHCOMPACT)
1299 fprintf (file, "\t.mode\tSHcompact\n");
1300 else if (TARGET_SHMEDIA)
1301 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1302 TARGET_SHMEDIA64 ? 64 : 32);
1305 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1308 unspec_caller_rtx_p (pat)
1311 switch (GET_CODE (pat))
1314 return unspec_caller_rtx_p (XEXP (pat, 0));
1317 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1319 return unspec_caller_rtx_p (XEXP (pat, 1));
1321 if (XINT (pat, 1) == UNSPEC_CALLER)
1330 /* Indicate that INSN cannot be duplicated. This is true for insn
1331 that generates an unique label. */
1334 sh_cannot_copy_insn_p (insn)
1339 if (!reload_completed || !flag_pic)
1342 if (GET_CODE (insn) != INSN)
1344 if (asm_noperands (insn) >= 0)
1347 pat = PATTERN (insn);
1348 if (GET_CODE (pat) != SET)
1350 pat = SET_SRC (pat);
1352 if (unspec_caller_rtx_p (pat))
1358 /* Actual number of instructions used to make a shift by N. */
1359 static const char ashiftrt_insns[] =
1360 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1362 /* Left shift and logical right shift are the same. */
1363 static const char shift_insns[] =
1364 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1366 /* Individual shift amounts needed to get the above length sequences.
1367 One bit right shifts clobber the T bit, so when possible, put one bit
1368 shifts in the middle of the sequence, so the ends are eligible for
1369 branch delay slots. */
1370 static const short shift_amounts[32][5] = {
1371 {0}, {1}, {2}, {2, 1},
1372 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1373 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1374 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1375 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1376 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1377 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1378 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1380 /* Likewise, but for shift amounts < 16, up to three highmost bits
1381 might be clobbered. This is typically used when combined with some
1382 kind of sign or zero extension. */
1384 static const char ext_shift_insns[] =
1385 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1387 static const short ext_shift_amounts[32][4] = {
1388 {0}, {1}, {2}, {2, 1},
1389 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1390 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1391 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1392 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1393 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1394 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1395 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1397 /* Assuming we have a value that has been sign-extended by at least one bit,
1398 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1399 to shift it by N without data loss, and quicker than by other means? */
1400 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1402 /* This is used in length attributes in sh.md to help compute the length
1403 of arbitrary constant shift instructions. */
1406 shift_insns_rtx (insn)
1409 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1410 int shift_count = INTVAL (XEXP (set_src, 1));
1411 enum rtx_code shift_code = GET_CODE (set_src);
1416 return ashiftrt_insns[shift_count];
1419 return shift_insns[shift_count];
1425 /* Return the cost of a shift. */
1436 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1438 if (GET_MODE (x) == DImode
1439 && GET_CODE (XEXP (x, 1)) == CONST_INT
1440 && INTVAL (XEXP (x, 1)) == 1)
1443 /* Everything else is invalid, because there is no pattern for it. */
1446 /* If shift by a non constant, then this will be expensive. */
1447 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1448 return SH_DYNAMIC_SHIFT_COST;
1450 value = INTVAL (XEXP (x, 1));
1452 /* Otherwise, return the true cost in instructions. */
1453 if (GET_CODE (x) == ASHIFTRT)
1455 int cost = ashiftrt_insns[value];
1456 /* If SH3, then we put the constant in a reg and use shad. */
1457 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1458 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1462 return shift_insns[value];
1465 /* Return the cost of an AND operation. */
1473 /* Anding with a register is a single cycle and instruction. */
1474 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1477 i = INTVAL (XEXP (x, 1));
1481 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1482 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1483 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1489 /* These constants are single cycle extu.[bw] instructions. */
1490 if (i == 0xff || i == 0xffff)
1492 /* Constants that can be used in an and immediate instruction is a single
1493 cycle, but this requires r0, so make it a little more expensive. */
1494 if (CONST_OK_FOR_L (i))
1496 /* Constants that can be loaded with a mov immediate and an and.
1497 This case is probably unnecessary. */
1498 if (CONST_OK_FOR_I (i))
1500 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1501 This case is probably unnecessary. */
1505 /* Return the cost of an addition or a subtraction. */
1511 /* Adding a register is a single cycle insn. */
1512 if (GET_CODE (XEXP (x, 1)) == REG
1513 || GET_CODE (XEXP (x, 1)) == SUBREG)
1516 /* Likewise for small constants. */
1517 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1518 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1522 switch (GET_CODE (XEXP (x, 1)))
1527 return TARGET_SHMEDIA64 ? 5 : 3;
1530 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1532 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1534 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1542 /* Any other constant requires a 2 cycle pc-relative load plus an
1547 /* Return the cost of a multiply. */
1550 rtx x ATTRIBUTE_UNUSED;
1557 /* We have a mul insn, so we can never take more than the mul and the
1558 read of the mac reg, but count more because of the latency and extra
1560 if (TARGET_SMALLCODE)
1565 /* If we're aiming at small code, then just count the number of
1566 insns in a multiply call sequence. */
1567 if (TARGET_SMALLCODE)
1570 /* Otherwise count all the insns in the routine we'd be calling too. */
1574 /* Compute a (partial) cost for rtx X. Return true if the complete
1575 cost has been computed, and false if subexpressions should be
1576 scanned. In either case, *TOTAL contains the cost result. */
1579 sh_rtx_costs (x, code, outer_code, total)
1581 int code, outer_code, *total;
1588 if (INTVAL (x) == 0)
1590 else if (outer_code == AND && and_operand ((x), DImode))
1592 else if ((outer_code == IOR || outer_code == XOR
1593 || outer_code == PLUS)
1594 && CONST_OK_FOR_P (INTVAL (x)))
1596 else if (CONST_OK_FOR_J (INTVAL (x)))
1597 *total = COSTS_N_INSNS (outer_code != SET);
1598 else if (CONST_OK_FOR_J (INTVAL (x) >> 16))
1599 *total = COSTS_N_INSNS (2);
1600 else if (CONST_OK_FOR_J ((INTVAL (x) >> 16) >> 16))
1601 *total = COSTS_N_INSNS (3);
1603 *total = COSTS_N_INSNS (4);
1606 if (CONST_OK_FOR_I (INTVAL (x)))
1608 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1609 && CONST_OK_FOR_L (INTVAL (x)))
1618 if (TARGET_SHMEDIA64)
1619 *total = COSTS_N_INSNS (4);
1620 else if (TARGET_SHMEDIA32)
1621 *total = COSTS_N_INSNS (2);
1628 *total = COSTS_N_INSNS (4);
1634 *total = COSTS_N_INSNS (addsubcosts (x));
1638 *total = COSTS_N_INSNS (andcosts (x));
1642 *total = COSTS_N_INSNS (multcosts (x));
1648 *total = COSTS_N_INSNS (shiftcosts (x));
1655 *total = COSTS_N_INSNS (20);
1668 /* Compute the cost of an address. For the SH, all valid addresses are
1669 the same cost. Use a slightly higher cost for reg + reg addressing,
1670 since it increases pressure on r0. */
1676 return (GET_CODE (X) == PLUS
1677 && ! CONSTANT_P (XEXP (X, 1))
1678 && ! TARGET_SHMEDIA ? 1 : 0);
1681 /* Code to expand a shift. */
1684 gen_ashift (type, n, reg)
1689 /* Negative values here come from the shift_amounts array. */
1702 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1706 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1708 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1711 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1716 /* Same for HImode */
1719 gen_ashift_hi (type, n, reg)
1724 /* Negative values here come from the shift_amounts array. */
1738 /* We don't have HImode right shift operations because using the
1739 ordinary 32 bit shift instructions for that doesn't generate proper
1740 zero/sign extension.
1741 gen_ashift_hi is only called in contexts where we know that the
1742 sign extension works out correctly. */
1745 if (GET_CODE (reg) == SUBREG)
1747 offset = SUBREG_BYTE (reg);
1748 reg = SUBREG_REG (reg);
1750 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1754 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1759 /* Output RTL to split a constant shift into its component SH constant
1760 shift instructions. */
1763 gen_shifty_op (code, operands)
1767 int value = INTVAL (operands[2]);
1770 /* Truncate the shift count in case it is out of bounds. */
1771 value = value & 0x1f;
1775 if (code == LSHIFTRT)
1777 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1778 emit_insn (gen_movt (operands[0]));
1781 else if (code == ASHIFT)
1783 /* There is a two instruction sequence for 31 bit left shifts,
1784 but it requires r0. */
1785 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1787 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1788 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1793 else if (value == 0)
1795 /* This can happen when not optimizing. We must output something here
1796 to prevent the compiler from aborting in final.c after the try_split
1798 emit_insn (gen_nop ());
1802 max = shift_insns[value];
1803 for (i = 0; i < max; i++)
1804 gen_ashift (code, shift_amounts[value][i], operands[0]);
1807 /* Same as above, but optimized for values where the topmost bits don't
1811 gen_shifty_hi_op (code, operands)
1815 int value = INTVAL (operands[2]);
1817 void (*gen_fun) PARAMS ((int, int, rtx));
1819 /* This operation is used by and_shl for SImode values with a few
1820 high bits known to be cleared. */
1824 emit_insn (gen_nop ());
1828 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1831 max = ext_shift_insns[value];
1832 for (i = 0; i < max; i++)
1833 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1836 /* When shifting right, emit the shifts in reverse order, so that
1837 solitary negative values come first. */
1838 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1839 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1842 /* Output RTL for an arithmetic right shift. */
1844 /* ??? Rewrite to use super-optimizer sequences. */
1847 expand_ashiftrt (operands)
1858 if (GET_CODE (operands[2]) != CONST_INT)
1860 rtx count = copy_to_mode_reg (SImode, operands[2]);
1861 emit_insn (gen_negsi2 (count, count));
1862 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1865 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1866 > 1 + SH_DYNAMIC_SHIFT_COST)
1869 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1870 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1874 if (GET_CODE (operands[2]) != CONST_INT)
1877 value = INTVAL (operands[2]) & 31;
1881 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1884 else if (value >= 16 && value <= 19)
1886 wrk = gen_reg_rtx (SImode);
1887 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1890 gen_ashift (ASHIFTRT, 1, wrk);
1891 emit_move_insn (operands[0], wrk);
1894 /* Expand a short sequence inline, longer call a magic routine. */
1895 else if (value <= 5)
1897 wrk = gen_reg_rtx (SImode);
1898 emit_move_insn (wrk, operands[1]);
1900 gen_ashift (ASHIFTRT, 1, wrk);
1901 emit_move_insn (operands[0], wrk);
1905 wrk = gen_reg_rtx (Pmode);
1907 /* Load the value into an arg reg and call a helper. */
1908 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1909 sprintf (func, "__ashiftrt_r4_%d", value);
1910 func_name = get_identifier (func);
1911 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1912 emit_move_insn (wrk, sym);
1913 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1914 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1919 sh_dynamicalize_shift_p (count)
1922 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1925 /* Try to find a good way to implement the combiner pattern
1926 [(set (match_operand:SI 0 "register_operand" "r")
1927 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1928 (match_operand:SI 2 "const_int_operand" "n"))
1929 (match_operand:SI 3 "const_int_operand" "n"))) .
1930 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1931 return 0 for simple right / left or left/right shift combination.
1932 return 1 for a combination of shifts with zero_extend.
1933 return 2 for a combination of shifts with an AND that needs r0.
1934 return 3 for a combination of shifts with an AND that needs an extra
1935 scratch register, when the three highmost bits of the AND mask are clear.
1936 return 4 for a combination of shifts with an AND that needs an extra
1937 scratch register, when any of the three highmost bits of the AND mask
1939 If ATTRP is set, store an initial right shift width in ATTRP[0],
1940 and the instruction length in ATTRP[1] . These values are not valid
1942 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1943 shift_amounts for the last shift value that is to be used before the
1946 shl_and_kind (left_rtx, mask_rtx, attrp)
1947 rtx left_rtx, mask_rtx;
1950 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1951 int left = INTVAL (left_rtx), right;
1953 int cost, best_cost = 10000;
1954 int best_right = 0, best_len = 0;
1958 if (left < 0 || left > 31)
1960 if (GET_CODE (mask_rtx) == CONST_INT)
1961 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1963 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1964 /* Can this be expressed as a right shift / left shift pair ? */
1965 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1966 right = exact_log2 (lsb);
1967 mask2 = ~(mask + lsb - 1);
1968 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1969 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1971 best_cost = shift_insns[right] + shift_insns[right + left];
1972 /* mask has no trailing zeroes <==> ! right */
1973 else if (! right && mask2 == ~(lsb2 - 1))
1975 int late_right = exact_log2 (lsb2);
1976 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1978 /* Try to use zero extend */
1979 if (mask2 == ~(lsb2 - 1))
1983 for (width = 8; width <= 16; width += 8)
1985 /* Can we zero-extend right away? */
1986 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1989 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1990 if (cost < best_cost)
2001 /* ??? Could try to put zero extend into initial right shift,
2002 or even shift a bit left before the right shift. */
2003 /* Determine value of first part of left shift, to get to the
2004 zero extend cut-off point. */
2005 first = width - exact_log2 (lsb2) + right;
2006 if (first >= 0 && right + left - first >= 0)
2008 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2009 + ext_shift_insns[right + left - first];
2010 if (cost < best_cost)
2022 /* Try to use r0 AND pattern */
2023 for (i = 0; i <= 2; i++)
2027 if (! CONST_OK_FOR_L (mask >> i))
2029 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2030 if (cost < best_cost)
2035 best_len = cost - 1;
2038 /* Try to use a scratch register to hold the AND operand. */
2039 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2040 for (i = 0; i <= 2; i++)
2044 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
2045 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2046 if (cost < best_cost)
2051 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
2057 attrp[0] = best_right;
2058 attrp[1] = best_len;
2063 /* This is used in length attributes of the unnamed instructions
2064 corresponding to shl_and_kind return values of 1 and 2. */
2066 shl_and_length (insn)
2069 rtx set_src, left_rtx, mask_rtx;
2072 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2073 left_rtx = XEXP (XEXP (set_src, 0), 1);
2074 mask_rtx = XEXP (set_src, 1);
2075 shl_and_kind (left_rtx, mask_rtx, attributes);
2076 return attributes[1];
2079 /* This is used in length attribute of the and_shl_scratch instruction. */
2082 shl_and_scr_length (insn)
2085 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2086 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2087 rtx op = XEXP (set_src, 0);
2088 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2089 op = XEXP (XEXP (op, 0), 0);
2090 return len + shift_insns[INTVAL (XEXP (op, 1))];
2093 /* Generating rtl? */
2094 extern int rtx_equal_function_value_matters;
2096 /* Generate rtl for instructions for which shl_and_kind advised a particular
2097 method of generating them, i.e. returned zero. */
2100 gen_shl_and (dest, left_rtx, mask_rtx, source)
2101 rtx dest, left_rtx, mask_rtx, source;
2104 unsigned HOST_WIDE_INT mask;
2105 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2106 int right, total_shift;
2107 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
2109 right = attributes[0];
2110 total_shift = INTVAL (left_rtx) + right;
2111 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2118 int first = attributes[2];
2123 emit_insn ((mask << right) <= 0xff
2124 ? gen_zero_extendqisi2(dest,
2125 gen_lowpart (QImode, source))
2126 : gen_zero_extendhisi2(dest,
2127 gen_lowpart (HImode, source)));
2131 emit_insn (gen_movsi (dest, source));
2135 operands[2] = GEN_INT (right);
2136 gen_shifty_hi_op (LSHIFTRT, operands);
2140 operands[2] = GEN_INT (first);
2141 gen_shifty_hi_op (ASHIFT, operands);
2142 total_shift -= first;
2146 emit_insn (mask <= 0xff
2147 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2148 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2149 if (total_shift > 0)
2151 operands[2] = GEN_INT (total_shift);
2152 gen_shifty_hi_op (ASHIFT, operands);
2157 shift_gen_fun = gen_shifty_op;
2159 /* If the topmost bit that matters is set, set the topmost bits
2160 that don't matter. This way, we might be able to get a shorter
2162 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2163 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2165 /* Don't expand fine-grained when combining, because that will
2166 make the pattern fail. */
2167 if (rtx_equal_function_value_matters
2168 || reload_in_progress || reload_completed)
2172 /* Cases 3 and 4 should be handled by this split
2173 only while combining */
2178 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2181 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2186 operands[2] = GEN_INT (total_shift);
2187 shift_gen_fun (ASHIFT, operands);
2194 if (kind != 4 && total_shift < 16)
2196 neg = -ext_shift_amounts[total_shift][1];
2198 neg -= ext_shift_amounts[total_shift][2];
2202 emit_insn (gen_and_shl_scratch (dest, source,
2205 GEN_INT (total_shift + neg),
2207 emit_insn (gen_movsi (dest, dest));
2214 /* Try to find a good way to implement the combiner pattern
2215 [(set (match_operand:SI 0 "register_operand" "=r")
2216 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2217 (match_operand:SI 2 "const_int_operand" "n")
2218 (match_operand:SI 3 "const_int_operand" "n")
2220 (clobber (reg:SI T_REG))]
2221 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2222 return 0 for simple left / right shift combination.
2223 return 1 for left shift / 8 bit sign extend / left shift.
2224 return 2 for left shift / 16 bit sign extend / left shift.
2225 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2226 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2227 return 5 for left shift / 16 bit sign extend / right shift
2228 return 6 for < 8 bit sign extend / left shift.
2229 return 7 for < 8 bit sign extend / left shift / single right shift.
2230 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2233 shl_sext_kind (left_rtx, size_rtx, costp)
2234 rtx left_rtx, size_rtx;
2237 int left, size, insize, ext;
2238 int cost = 0, best_cost;
2241 left = INTVAL (left_rtx);
2242 size = INTVAL (size_rtx);
2243 insize = size - left;
2246 /* Default to left / right shift. */
2248 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2251 /* 16 bit shift / sign extend / 16 bit shift */
2252 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2253 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2254 below, by alternative 3 or something even better. */
2255 if (cost < best_cost)
2261 /* Try a plain sign extend between two shifts. */
2262 for (ext = 16; ext >= insize; ext -= 8)
2266 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2267 if (cost < best_cost)
2269 kind = ext / (unsigned) 8;
2273 /* Check if we can do a sloppy shift with a final signed shift
2274 restoring the sign. */
2275 if (EXT_SHIFT_SIGNED (size - ext))
2276 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2277 /* If not, maybe it's still cheaper to do the second shift sloppy,
2278 and do a final sign extend? */
2279 else if (size <= 16)
2280 cost = ext_shift_insns[ext - insize] + 1
2281 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2284 if (cost < best_cost)
2286 kind = ext / (unsigned) 8 + 2;
2290 /* Check if we can sign extend in r0 */
2293 cost = 3 + shift_insns[left];
2294 if (cost < best_cost)
2299 /* Try the same with a final signed shift. */
2302 cost = 3 + ext_shift_insns[left + 1] + 1;
2303 if (cost < best_cost)
2312 /* Try to use a dynamic shift. */
2313 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2314 if (cost < best_cost)
2325 /* Function to be used in the length attribute of the instructions
2326 implementing this pattern. */
2329 shl_sext_length (insn)
2332 rtx set_src, left_rtx, size_rtx;
2335 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2336 left_rtx = XEXP (XEXP (set_src, 0), 1);
2337 size_rtx = XEXP (set_src, 1);
2338 shl_sext_kind (left_rtx, size_rtx, &cost);
2342 /* Generate rtl for this pattern */
2345 gen_shl_sext (dest, left_rtx, size_rtx, source)
2346 rtx dest, left_rtx, size_rtx, source;
2349 int left, size, insize, cost;
2352 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2353 left = INTVAL (left_rtx);
2354 size = INTVAL (size_rtx);
2355 insize = size - left;
2363 int ext = kind & 1 ? 8 : 16;
2364 int shift2 = size - ext;
2366 /* Don't expand fine-grained when combining, because that will
2367 make the pattern fail. */
2368 if (! rtx_equal_function_value_matters
2369 && ! reload_in_progress && ! reload_completed)
2371 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2372 emit_insn (gen_movsi (dest, source));
2376 emit_insn (gen_movsi (dest, source));
2380 operands[2] = GEN_INT (ext - insize);
2381 gen_shifty_hi_op (ASHIFT, operands);
2384 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2385 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2390 operands[2] = GEN_INT (shift2);
2391 gen_shifty_op (ASHIFT, operands);
2398 if (EXT_SHIFT_SIGNED (shift2))
2400 operands[2] = GEN_INT (shift2 + 1);
2401 gen_shifty_op (ASHIFT, operands);
2402 operands[2] = GEN_INT (1);
2403 gen_shifty_op (ASHIFTRT, operands);
2406 operands[2] = GEN_INT (shift2);
2407 gen_shifty_hi_op (ASHIFT, operands);
2411 operands[2] = GEN_INT (-shift2);
2412 gen_shifty_hi_op (LSHIFTRT, operands);
2414 emit_insn (size <= 8
2415 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2416 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2423 if (! rtx_equal_function_value_matters
2424 && ! reload_in_progress && ! reload_completed)
2425 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2429 operands[2] = GEN_INT (16 - insize);
2430 gen_shifty_hi_op (ASHIFT, operands);
2431 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2433 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2435 gen_ashift (ASHIFTRT, 1, dest);
2440 /* Don't expand fine-grained when combining, because that will
2441 make the pattern fail. */
2442 if (! rtx_equal_function_value_matters
2443 && ! reload_in_progress && ! reload_completed)
2445 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2446 emit_insn (gen_movsi (dest, source));
2449 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2450 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2451 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2453 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2454 gen_shifty_op (ASHIFT, operands);
2456 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2464 /* Prefix a symbol_ref name with "datalabel". */
2467 gen_datalabel_ref (sym)
2470 if (GET_CODE (sym) == LABEL_REF)
2471 return gen_rtx_CONST (GET_MODE (sym),
2472 gen_rtx_UNSPEC (GET_MODE (sym),
2476 if (GET_CODE (sym) != SYMBOL_REF)
2483 /* The SH cannot load a large constant into a register, constants have to
2484 come from a pc relative load. The reference of a pc relative load
2485 instruction must be less than 1k infront of the instruction. This
2486 means that we often have to dump a constant inside a function, and
2487 generate code to branch around it.
2489 It is important to minimize this, since the branches will slow things
2490 down and make things bigger.
2492 Worst case code looks like:
2510 We fix this by performing a scan before scheduling, which notices which
2511 instructions need to have their operands fetched from the constant table
2512 and builds the table.
2516 scan, find an instruction which needs a pcrel move. Look forward, find the
2517 last barrier which is within MAX_COUNT bytes of the requirement.
2518 If there isn't one, make one. Process all the instructions between
2519 the find and the barrier.
2521 In the above example, we can tell that L3 is within 1k of L1, so
2522 the first move can be shrunk from the 3 insn+constant sequence into
2523 just 1 insn, and the constant moved to L3 to make:
2534 Then the second move becomes the target for the shortening process. */
2538 rtx value; /* Value in table. */
2539 rtx label; /* Label of value. */
2540 rtx wend; /* End of window. */
2541 enum machine_mode mode; /* Mode of value. */
2543 /* True if this constant is accessed as part of a post-increment
2544 sequence. Note that HImode constants are never accessed in this way. */
2545 bool part_of_sequence_p;
2548 /* The maximum number of constants that can fit into one pool, since
2549 the pc relative range is 0...1020 bytes and constants are at least 4
2552 #define MAX_POOL_SIZE (1020/4)
2553 static pool_node pool_vector[MAX_POOL_SIZE];
2554 static int pool_size;
2555 static rtx pool_window_label;
2556 static int pool_window_last;
2558 /* ??? If we need a constant in HImode which is the truncated value of a
2559 constant we need in SImode, we could combine the two entries thus saving
2560 two bytes. Is this common enough to be worth the effort of implementing
2563 /* ??? This stuff should be done at the same time that we shorten branches.
2564 As it is now, we must assume that all branches are the maximum size, and
2565 this causes us to almost always output constant pools sooner than
2568 /* Add a constant to the pool and return its label. */
2571 add_constant (x, mode, last_value)
2573 enum machine_mode mode;
2577 rtx lab, new, ref, newref;
2579 /* First see if we've already got it. */
2580 for (i = 0; i < pool_size; i++)
2582 if (x->code == pool_vector[i].value->code
2583 && mode == pool_vector[i].mode)
2585 if (x->code == CODE_LABEL)
2587 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2590 if (rtx_equal_p (x, pool_vector[i].value))
2595 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2597 new = gen_label_rtx ();
2598 LABEL_REFS (new) = pool_vector[i].label;
2599 pool_vector[i].label = lab = new;
2601 if (lab && pool_window_label)
2603 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2604 ref = pool_vector[pool_window_last].wend;
2605 LABEL_NEXTREF (newref) = ref;
2606 pool_vector[pool_window_last].wend = newref;
2609 pool_window_label = new;
2610 pool_window_last = i;
2616 /* Need a new one. */
2617 pool_vector[pool_size].value = x;
2618 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2621 pool_vector[pool_size - 1].part_of_sequence_p = true;
2624 lab = gen_label_rtx ();
2625 pool_vector[pool_size].mode = mode;
2626 pool_vector[pool_size].label = lab;
2627 pool_vector[pool_size].wend = NULL_RTX;
2628 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2629 if (lab && pool_window_label)
2631 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2632 ref = pool_vector[pool_window_last].wend;
2633 LABEL_NEXTREF (newref) = ref;
2634 pool_vector[pool_window_last].wend = newref;
2637 pool_window_label = lab;
2638 pool_window_last = pool_size;
2643 /* Output the literal table. */
2654 /* Do two passes, first time dump out the HI sized constants. */
2656 for (i = 0; i < pool_size; i++)
2658 pool_node *p = &pool_vector[i];
2660 if (p->mode == HImode)
2664 scan = emit_insn_after (gen_align_2 (), scan);
2667 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2668 scan = emit_label_after (lab, scan);
2669 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2671 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2673 lab = XEXP (ref, 0);
2674 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2677 else if (p->mode == DFmode)
2683 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2685 rtx align_insn = NULL_RTX;
2687 scan = emit_label_after (gen_label_rtx (), scan);
2688 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2691 for (i = 0; i < pool_size; i++)
2693 pool_node *p = &pool_vector[i];
2701 if (align_insn && !p->part_of_sequence_p)
2703 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2704 emit_label_before (lab, align_insn);
2705 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2707 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2709 lab = XEXP (ref, 0);
2710 emit_insn_before (gen_consttable_window_end (lab),
2713 delete_insn (align_insn);
2714 align_insn = NULL_RTX;
2719 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2720 scan = emit_label_after (lab, scan);
2721 scan = emit_insn_after (gen_consttable_4 (p->value,
2723 need_align = ! need_align;
2729 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2734 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2735 scan = emit_label_after (lab, scan);
2736 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2744 if (p->mode != HImode)
2746 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2748 lab = XEXP (ref, 0);
2749 scan = emit_insn_after (gen_consttable_window_end (lab),
2758 for (i = 0; i < pool_size; i++)
2760 pool_node *p = &pool_vector[i];
2771 scan = emit_label_after (gen_label_rtx (), scan);
2772 scan = emit_insn_after (gen_align_4 (), scan);
2774 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2775 scan = emit_label_after (lab, scan);
2776 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2784 scan = emit_label_after (gen_label_rtx (), scan);
2785 scan = emit_insn_after (gen_align_4 (), scan);
2787 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2788 scan = emit_label_after (lab, scan);
2789 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2797 if (p->mode != HImode)
2799 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2801 lab = XEXP (ref, 0);
2802 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2807 scan = emit_insn_after (gen_consttable_end (), scan);
2808 scan = emit_barrier_after (scan);
2810 pool_window_label = NULL_RTX;
2811 pool_window_last = 0;
2814 /* Return nonzero if constant would be an ok source for a
2815 mov.w instead of a mov.l. */
2821 return (GET_CODE (src) == CONST_INT
2822 && INTVAL (src) >= -32768
2823 && INTVAL (src) <= 32767);
2826 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2828 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2829 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2830 need to fix it if the input value is CONST_OK_FOR_I. */
2836 if (GET_CODE (insn) == INSN)
2838 rtx pat = PATTERN (insn);
2839 if (GET_CODE (pat) == PARALLEL)
2840 pat = XVECEXP (pat, 0, 0);
2841 if (GET_CODE (pat) == SET
2842 /* We can load any 8 bit value if we don't care what the high
2843 order bits end up as. */
2844 && GET_MODE (SET_DEST (pat)) != QImode
2845 && (CONSTANT_P (SET_SRC (pat))
2846 /* Match mova_const. */
2847 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2848 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2849 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2851 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2852 && (fp_zero_operand (SET_SRC (pat))
2853 || fp_one_operand (SET_SRC (pat)))
2854 /* ??? If this is a -m4 or -m4-single compilation, in general
2855 we don't know the current setting of fpscr, so disable fldi.
2856 There is an exception if this was a register-register move
2857 before reload - and hence it was ascertained that we have
2858 single precision setting - and in a post-reload optimization
2859 we changed this to do a constant load. In that case
2860 we don't have an r0 clobber, hence we must use fldi. */
2861 && (! TARGET_SH4 || TARGET_FMOVD
2862 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2864 && GET_CODE (SET_DEST (pat)) == REG
2865 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2866 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2867 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2878 return (GET_CODE (insn) == INSN
2879 && GET_CODE (PATTERN (insn)) == SET
2880 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2881 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2882 /* Don't match mova_const. */
2883 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2886 /* Find the last barrier from insn FROM which is close enough to hold the
2887 constant pool. If we can't find one, then create one near the end of
2891 find_barrier (num_mova, mova, from)
2902 int leading_mova = num_mova;
2903 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2907 /* For HImode: range is 510, add 4 because pc counts from address of
2908 second instruction after this one, subtract 2 for the jump instruction
2909 that we may need to emit before the table, subtract 2 for the instruction
2910 that fills the jump delay slot (in very rare cases, reorg will take an
2911 instruction from after the constant pool or will leave the delay slot
2912 empty). This gives 510.
2913 For SImode: range is 1020, add 4 because pc counts from address of
2914 second instruction after this one, subtract 2 in case pc is 2 byte
2915 aligned, subtract 2 for the jump instruction that we may need to emit
2916 before the table, subtract 2 for the instruction that fills the jump
2917 delay slot. This gives 1018. */
2919 /* The branch will always be shortened now that the reference address for
2920 forward branches is the successor address, thus we need no longer make
2921 adjustments to the [sh]i_limit for -O0. */
2926 while (from && count_si < si_limit && count_hi < hi_limit)
2928 int inc = get_attr_length (from);
2931 if (GET_CODE (from) == CODE_LABEL)
2934 new_align = 1 << label_to_alignment (from);
2935 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2936 new_align = 1 << barrier_align (from);
2942 if (GET_CODE (from) == BARRIER)
2945 found_barrier = from;
2947 /* If we are at the end of the function, or in front of an alignment
2948 instruction, we need not insert an extra alignment. We prefer
2949 this kind of barrier. */
2950 if (barrier_align (from) > 2)
2951 good_barrier = from;
2954 if (broken_move (from))
2957 enum machine_mode mode;
2959 pat = PATTERN (from);
2960 if (GET_CODE (pat) == PARALLEL)
2961 pat = XVECEXP (pat, 0, 0);
2962 src = SET_SRC (pat);
2963 dst = SET_DEST (pat);
2964 mode = GET_MODE (dst);
2966 /* We must explicitly check the mode, because sometimes the
2967 front end will generate code to load unsigned constants into
2968 HImode targets without properly sign extending them. */
2970 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2973 /* We put the short constants before the long constants, so
2974 we must count the length of short constants in the range
2975 for the long constants. */
2976 /* ??? This isn't optimal, but is easy to do. */
2981 /* We dump DF/DI constants before SF/SI ones, because
2982 the limit is the same, but the alignment requirements
2983 are higher. We may waste up to 4 additional bytes
2984 for alignment, and the DF/DI constant may have
2985 another SF/SI constant placed before it. */
2986 if (TARGET_SHCOMPACT
2988 && (mode == DFmode || mode == DImode))
2993 while (si_align > 2 && found_si + si_align - 2 > count_si)
2995 if (found_si > count_si)
2996 count_si = found_si;
2997 found_si += GET_MODE_SIZE (mode);
2999 si_limit -= GET_MODE_SIZE (mode);
3002 /* See the code in machine_dependent_reorg, which has a similar if
3003 statement that generates a new mova insn in many cases. */
3004 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3014 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3016 if (found_si > count_si)
3017 count_si = found_si;
3019 else if (GET_CODE (from) == JUMP_INSN
3020 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3021 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3025 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3027 /* We have just passed the barrier in front of the
3028 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3029 the ADDR_DIFF_VEC is accessed as data, just like our pool
3030 constants, this is a good opportunity to accommodate what
3031 we have gathered so far.
3032 If we waited any longer, we could end up at a barrier in
3033 front of code, which gives worse cache usage for separated
3034 instruction / data caches. */
3035 good_barrier = found_barrier;
3040 rtx body = PATTERN (from);
3041 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3044 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3045 else if (GET_CODE (from) == JUMP_INSN
3047 && ! TARGET_SMALLCODE)
3053 if (new_align > si_align)
3055 si_limit -= (count_si - 1) & (new_align - si_align);
3056 si_align = new_align;
3058 count_si = (count_si + new_align - 1) & -new_align;
3063 if (new_align > hi_align)
3065 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3066 hi_align = new_align;
3068 count_hi = (count_hi + new_align - 1) & -new_align;
3070 from = NEXT_INSN (from);
3077 /* Try as we might, the leading mova is out of range. Change
3078 it into a load (which will become a pcload) and retry. */
3079 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3080 INSN_CODE (mova) = -1;
3081 return find_barrier (0, 0, mova);
3085 /* Insert the constant pool table before the mova instruction,
3086 to prevent the mova label reference from going out of range. */
3088 good_barrier = found_barrier = barrier_before_mova;
3094 if (good_barrier && next_real_insn (found_barrier))
3095 found_barrier = good_barrier;
3099 /* We didn't find a barrier in time to dump our stuff,
3100 so we'll make one. */
3101 rtx label = gen_label_rtx ();
3103 /* If we exceeded the range, then we must back up over the last
3104 instruction we looked at. Otherwise, we just need to undo the
3105 NEXT_INSN at the end of the loop. */
3106 if (count_hi > hi_limit || count_si > si_limit)
3107 from = PREV_INSN (PREV_INSN (from));
3109 from = PREV_INSN (from);
3111 /* Walk back to be just before any jump or label.
3112 Putting it before a label reduces the number of times the branch
3113 around the constant pool table will be hit. Putting it before
3114 a jump makes it more likely that the bra delay slot will be
3116 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3117 || GET_CODE (from) == CODE_LABEL)
3118 from = PREV_INSN (from);
3120 from = emit_jump_insn_after (gen_jump (label), from);
3121 JUMP_LABEL (from) = label;
3122 LABEL_NUSES (label) = 1;
3123 found_barrier = emit_barrier_after (from);
3124 emit_label_after (label, found_barrier);
3127 return found_barrier;
3130 /* If the instruction INSN is implemented by a special function, and we can
3131 positively find the register that is used to call the sfunc, and this
3132 register is not used anywhere else in this instruction - except as the
3133 destination of a set, return this register; else, return 0. */
3135 sfunc_uses_reg (insn)
3139 rtx pattern, part, reg_part, reg;
3141 if (GET_CODE (insn) != INSN)
3143 pattern = PATTERN (insn);
3144 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3147 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3149 part = XVECEXP (pattern, 0, i);
3150 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3155 reg = XEXP (reg_part, 0);
3156 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3158 part = XVECEXP (pattern, 0, i);
3159 if (part == reg_part || GET_CODE (part) == CLOBBER)
3161 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3162 && GET_CODE (SET_DEST (part)) == REG)
3163 ? SET_SRC (part) : part)))
3169 /* See if the only way in which INSN uses REG is by calling it, or by
3170 setting it while calling it. Set *SET to a SET rtx if the register
3174 noncall_uses_reg (reg, insn, set)
3183 reg2 = sfunc_uses_reg (insn);
3184 if (reg2 && REGNO (reg2) == REGNO (reg))
3186 pattern = single_set (insn);
3188 && GET_CODE (SET_DEST (pattern)) == REG
3189 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3193 if (GET_CODE (insn) != CALL_INSN)
3195 /* We don't use rtx_equal_p because we don't care if the mode is
3197 pattern = single_set (insn);
3199 && GET_CODE (SET_DEST (pattern)) == REG
3200 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3206 par = PATTERN (insn);
3207 if (GET_CODE (par) == PARALLEL)
3208 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3210 part = XVECEXP (par, 0, i);
3211 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3214 return reg_mentioned_p (reg, SET_SRC (pattern));
3220 pattern = PATTERN (insn);
3222 if (GET_CODE (pattern) == PARALLEL)
3226 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3227 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3229 pattern = XVECEXP (pattern, 0, 0);
3232 if (GET_CODE (pattern) == SET)
3234 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3236 /* We don't use rtx_equal_p, because we don't care if the
3237 mode is different. */
3238 if (GET_CODE (SET_DEST (pattern)) != REG
3239 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3245 pattern = SET_SRC (pattern);
3248 if (GET_CODE (pattern) != CALL
3249 || GET_CODE (XEXP (pattern, 0)) != MEM
3250 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3256 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3257 general registers. Bits 0..15 mean that the respective registers
3258 are used as inputs in the instruction. Bits 16..31 mean that the
3259 registers 0..15, respectively, are used as outputs, or are clobbered.
3260 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3262 regs_used (x, is_dest)
3271 code = GET_CODE (x);
3276 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3277 << (REGNO (x) + is_dest));
3281 rtx y = SUBREG_REG (x);
3283 if (GET_CODE (y) != REG)
3286 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3288 subreg_regno_offset (REGNO (y),
3291 GET_MODE (x)) + is_dest));
3295 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3297 /* If there was a return value, it must have been indicated with USE. */
3312 fmt = GET_RTX_FORMAT (code);
3314 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3319 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3320 used |= regs_used (XVECEXP (x, i, j), is_dest);
3322 else if (fmt[i] == 'e')
3323 used |= regs_used (XEXP (x, i), is_dest);
3328 /* Create an instruction that prevents redirection of a conditional branch
3329 to the destination of the JUMP with address ADDR.
3330 If the branch needs to be implemented as an indirect jump, try to find
3331 a scratch register for it.
3332 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3333 If any preceding insn that doesn't fit into a delay slot is good enough,
3334 pass 1. Pass 2 if a definite blocking insn is needed.
3335 -1 is used internally to avoid deep recursion.
3336 If a blocking instruction is made or recognized, return it. */
3339 gen_block_redirect (jump, addr, need_block)
3341 int addr, need_block;
3344 rtx prev = prev_nonnote_insn (jump);
3347 /* First, check if we already have an instruction that satisfies our need. */
3348 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3350 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3352 if (GET_CODE (PATTERN (prev)) == USE
3353 || GET_CODE (PATTERN (prev)) == CLOBBER
3354 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3356 else if ((need_block &= ~1) < 0)
3358 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3361 /* We can't use JUMP_LABEL here because it might be undefined
3362 when not optimizing. */
3363 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3364 /* If the branch is out of range, try to find a scratch register for it. */
3366 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3370 /* Don't look for the stack pointer as a scratch register,
3371 it would cause trouble if an interrupt occurred. */
3372 unsigned try = 0x7fff, used;
3373 int jump_left = flag_expensive_optimizations + 1;
3375 /* It is likely that the most recent eligible instruction is wanted for
3376 the delay slot. Therefore, find out which registers it uses, and
3377 try to avoid using them. */
3379 for (scan = jump; (scan = PREV_INSN (scan)); )
3383 if (INSN_DELETED_P (scan))
3385 code = GET_CODE (scan);
3386 if (code == CODE_LABEL || code == JUMP_INSN)
3389 && GET_CODE (PATTERN (scan)) != USE
3390 && GET_CODE (PATTERN (scan)) != CLOBBER
3391 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3393 try &= ~regs_used (PATTERN (scan), 0);
3397 for (used = dead = 0, scan = JUMP_LABEL (jump);
3398 (scan = NEXT_INSN (scan)); )
3402 if (INSN_DELETED_P (scan))
3404 code = GET_CODE (scan);
3405 if (GET_RTX_CLASS (code) == 'i')
3407 used |= regs_used (PATTERN (scan), 0);
3408 if (code == CALL_INSN)
3409 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3410 dead |= (used >> 16) & ~used;
3416 if (code == JUMP_INSN)
3418 if (jump_left-- && simplejump_p (scan))
3419 scan = JUMP_LABEL (scan);
3425 /* Mask out the stack pointer again, in case it was
3426 the only 'free' register we have found. */
3429 /* If the immediate destination is still in range, check for possible
3430 threading with a jump beyond the delay slot insn.
3431 Don't check if we are called recursively; the jump has been or will be
3432 checked in a different invocation then. */
3434 else if (optimize && need_block >= 0)
3436 rtx next = next_active_insn (next_active_insn (dest));
3437 if (next && GET_CODE (next) == JUMP_INSN
3438 && GET_CODE (PATTERN (next)) == SET
3439 && recog_memoized (next) == CODE_FOR_jump_compact)
3441 dest = JUMP_LABEL (next);
3443 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3445 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3451 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3453 /* It would be nice if we could convert the jump into an indirect
3454 jump / far branch right now, and thus exposing all constituent
3455 instructions to further optimization. However, reorg uses
3456 simplejump_p to determine if there is an unconditional jump where
3457 it should try to schedule instructions from the target of the
3458 branch; simplejump_p fails for indirect jumps even if they have
3460 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3461 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3463 /* ??? We would like this to have the scope of the jump, but that
3464 scope will change when a delay slot insn of an inner scope is added.
3465 Hence, after delay slot scheduling, we'll have to expect
3466 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3469 INSN_SCOPE (insn) = INSN_SCOPE (jump);
3470 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3473 else if (need_block)
3474 /* We can't use JUMP_LABEL here because it might be undefined
3475 when not optimizing. */
3476 return emit_insn_before (gen_block_branch_redirect
3477 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3482 #define CONDJUMP_MIN -252
3483 #define CONDJUMP_MAX 262
3486 /* A label (to be placed) in front of the jump
3487 that jumps to our ultimate destination. */
3489 /* Where we are going to insert it if we cannot move the jump any farther,
3490 or the jump itself if we have picked up an existing jump. */
3492 /* The ultimate destination. */
3494 struct far_branch *prev;
3495 /* If the branch has already been created, its address;
3496 else the address of its first prospective user. */
3500 static void gen_far_branch PARAMS ((struct far_branch *));
3501 enum mdep_reorg_phase_e mdep_reorg_phase;
3504 struct far_branch *bp;
3506 rtx insn = bp->insert_place;
3508 rtx label = gen_label_rtx ();
3510 emit_label_after (label, insn);
3513 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3514 LABEL_NUSES (bp->far_label)++;
3517 jump = emit_jump_insn_after (gen_return (), insn);
3518 /* Emit a barrier so that reorg knows that any following instructions
3519 are not reachable via a fall-through path.
3520 But don't do this when not optimizing, since we wouldn't supress the
3521 alignment for the barrier then, and could end up with out-of-range
3522 pc-relative loads. */
3524 emit_barrier_after (jump);
3525 emit_label_after (bp->near_label, insn);
3526 JUMP_LABEL (jump) = bp->far_label;
3527 if (! invert_jump (insn, label, 1))
3530 (gen_stuff_delay_slot
3531 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3532 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3534 /* Prevent reorg from undoing our splits. */
3535 gen_block_redirect (jump, bp->address += 2, 2);
3538 /* Fix up ADDR_DIFF_VECs. */
3540 fixup_addr_diff_vecs (first)
3545 for (insn = first; insn; insn = NEXT_INSN (insn))
3547 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3549 if (GET_CODE (insn) != JUMP_INSN
3550 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3552 pat = PATTERN (insn);
3553 vec_lab = XEXP (XEXP (pat, 0), 0);
3555 /* Search the matching casesi_jump_2. */
3556 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3558 if (GET_CODE (prev) != JUMP_INSN)
3560 prevpat = PATTERN (prev);
3561 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3563 x = XVECEXP (prevpat, 0, 1);
3564 if (GET_CODE (x) != USE)
3567 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3571 /* Emit the reference label of the braf where it belongs, right after
3572 the casesi_jump_2 (i.e. braf). */
3573 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3574 emit_label_after (braf_label, prev);
3576 /* Fix up the ADDR_DIF_VEC to be relative
3577 to the reference address of the braf. */
3578 XEXP (XEXP (pat, 0), 0) = braf_label;
3582 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3583 a barrier. Return the base 2 logarithm of the desired alignment. */
3585 barrier_align (barrier_or_label)
3586 rtx barrier_or_label;
3588 rtx next = next_real_insn (barrier_or_label), pat, prev;
3589 int slot, credit, jump_to_next = 0;
3594 pat = PATTERN (next);
3596 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3599 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3600 /* This is a barrier in front of a constant table. */
3603 prev = prev_real_insn (barrier_or_label);
3604 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3606 pat = PATTERN (prev);
3607 /* If this is a very small table, we want to keep the alignment after
3608 the table to the minimum for proper code alignment. */
3609 return ((TARGET_SMALLCODE
3610 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3611 <= (unsigned)1 << (CACHE_LOG - 2)))
3612 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3615 if (TARGET_SMALLCODE)
3618 if (! TARGET_SH2 || ! optimize)
3619 return align_jumps_log;
3621 /* When fixing up pcloads, a constant table might be inserted just before
3622 the basic block that ends with the barrier. Thus, we can't trust the
3623 instruction lengths before that. */
3624 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3626 /* Check if there is an immediately preceding branch to the insn beyond
3627 the barrier. We must weight the cost of discarding useful information
3628 from the current cache line when executing this branch and there is
3629 an alignment, against that of fetching unneeded insn in front of the
3630 branch target when there is no alignment. */
3632 /* There are two delay_slot cases to consider. One is the simple case
3633 where the preceding branch is to the insn beyond the barrier (simple
3634 delay slot filling), and the other is where the preceding branch has
3635 a delay slot that is a duplicate of the insn after the barrier
3636 (fill_eager_delay_slots) and the branch is to the insn after the insn
3637 after the barrier. */
3639 /* PREV is presumed to be the JUMP_INSN for the barrier under
3640 investigation. Skip to the insn before it. */
3641 prev = prev_real_insn (prev);
3643 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3644 credit >= 0 && prev && GET_CODE (prev) == INSN;
3645 prev = prev_real_insn (prev))
3648 if (GET_CODE (PATTERN (prev)) == USE
3649 || GET_CODE (PATTERN (prev)) == CLOBBER)
3651 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3653 prev = XVECEXP (PATTERN (prev), 0, 1);
3654 if (INSN_UID (prev) == INSN_UID (next))
3656 /* Delay slot was filled with insn at jump target. */
3663 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3665 credit -= get_attr_length (prev);
3668 && GET_CODE (prev) == JUMP_INSN
3669 && JUMP_LABEL (prev))
3673 || next_real_insn (JUMP_LABEL (prev)) == next
3674 /* If relax_delay_slots() decides NEXT was redundant
3675 with some previous instruction, it will have
3676 redirected PREV's jump to the following insn. */
3677 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3678 /* There is no upper bound on redundant instructions
3679 that might have been skipped, but we must not put an
3680 alignment where none had been before. */
3681 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3683 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3684 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3685 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3687 rtx pat = PATTERN (prev);
3688 if (GET_CODE (pat) == PARALLEL)
3689 pat = XVECEXP (pat, 0, 0);
3690 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3696 return align_jumps_log;
3699 /* If we are inside a phony loop, almost any kind of label can turn up as the
3700 first one in the loop. Aligning a braf label causes incorrect switch
3701 destination addresses; we can detect braf labels because they are
3702 followed by a BARRIER.
3703 Applying loop alignment to small constant or switch tables is a waste
3704 of space, so we suppress this too. */
3706 sh_loop_align (label)
3712 next = next_nonnote_insn (next);
3713 while (next && GET_CODE (next) == CODE_LABEL);
3717 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3718 || recog_memoized (next) == CODE_FOR_consttable_2)
3721 return align_loops_log;
3724 /* Do a final pass over the function, just before delayed branch
3730 rtx first, insn, mova = NULL_RTX;
3732 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3733 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3735 first = get_insns ();
3737 /* We must split call insns before introducing `mova's. If we're
3738 optimizing, they'll have already been split. Otherwise, make
3739 sure we don't split them too late. */
3741 split_all_insns_noflow ();
3746 /* If relaxing, generate pseudo-ops to associate function calls with
3747 the symbols they call. It does no harm to not generate these
3748 pseudo-ops. However, when we can generate them, it enables to
3749 linker to potentially relax the jsr to a bsr, and eliminate the
3750 register load and, possibly, the constant pool entry. */
3752 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3755 /* Remove all REG_LABEL notes. We want to use them for our own
3756 purposes. This works because none of the remaining passes
3757 need to look at them.
3759 ??? But it may break in the future. We should use a machine
3760 dependent REG_NOTE, or some other approach entirely. */
3761 for (insn = first; insn; insn = NEXT_INSN (insn))
3767 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3768 remove_note (insn, note);
3772 for (insn = first; insn; insn = NEXT_INSN (insn))
3774 rtx pattern, reg, link, set, scan, dies, label;
3775 int rescan = 0, foundinsn = 0;
3777 if (GET_CODE (insn) == CALL_INSN)
3779 pattern = PATTERN (insn);
3781 if (GET_CODE (pattern) == PARALLEL)
3782 pattern = XVECEXP (pattern, 0, 0);
3783 if (GET_CODE (pattern) == SET)
3784 pattern = SET_SRC (pattern);
3786 if (GET_CODE (pattern) != CALL
3787 || GET_CODE (XEXP (pattern, 0)) != MEM)
3790 reg = XEXP (XEXP (pattern, 0), 0);
3794 reg = sfunc_uses_reg (insn);
3799 if (GET_CODE (reg) != REG)
3802 /* This is a function call via REG. If the only uses of REG
3803 between the time that it is set and the time that it dies
3804 are in function calls, then we can associate all the
3805 function calls with the setting of REG. */
3807 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3809 if (REG_NOTE_KIND (link) != 0)
3811 set = single_set (XEXP (link, 0));
3812 if (set && rtx_equal_p (reg, SET_DEST (set)))
3814 link = XEXP (link, 0);
3821 /* ??? Sometimes global register allocation will have
3822 deleted the insn pointed to by LOG_LINKS. Try
3823 scanning backward to find where the register is set. */
3824 for (scan = PREV_INSN (insn);
3825 scan && GET_CODE (scan) != CODE_LABEL;
3826 scan = PREV_INSN (scan))
3828 if (! INSN_P (scan))
3831 if (! reg_mentioned_p (reg, scan))
3834 if (noncall_uses_reg (reg, scan, &set))
3848 /* The register is set at LINK. */
3850 /* We can only optimize the function call if the register is
3851 being set to a symbol. In theory, we could sometimes
3852 optimize calls to a constant location, but the assembler
3853 and linker do not support that at present. */
3854 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3855 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3858 /* Scan forward from LINK to the place where REG dies, and
3859 make sure that the only insns which use REG are
3860 themselves function calls. */
3862 /* ??? This doesn't work for call targets that were allocated
3863 by reload, since there may not be a REG_DEAD note for the
3867 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3871 /* Don't try to trace forward past a CODE_LABEL if we haven't
3872 seen INSN yet. Ordinarily, we will only find the setting insn
3873 in LOG_LINKS if it is in the same basic block. However,
3874 cross-jumping can insert code labels in between the load and
3875 the call, and can result in situations where a single call
3876 insn may have two targets depending on where we came from. */
3878 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3881 if (! INSN_P (scan))
3884 /* Don't try to trace forward past a JUMP. To optimize
3885 safely, we would have to check that all the
3886 instructions at the jump destination did not use REG. */
3888 if (GET_CODE (scan) == JUMP_INSN)
3891 if (! reg_mentioned_p (reg, scan))
3894 if (noncall_uses_reg (reg, scan, &scanset))
3901 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3903 /* There is a function call to this register other
3904 than the one we are checking. If we optimize
3905 this call, we need to rescan again below. */
3909 /* ??? We shouldn't have to worry about SCANSET here.
3910 We should just be able to check for a REG_DEAD note
3911 on a function call. However, the REG_DEAD notes are
3912 apparently not dependable around libcalls; c-torture
3913 execute/920501-2 is a test case. If SCANSET is set,
3914 then this insn sets the register, so it must have
3915 died earlier. Unfortunately, this will only handle
3916 the cases in which the register is, in fact, set in a
3919 /* ??? We shouldn't have to use FOUNDINSN here.
3920 However, the LOG_LINKS fields are apparently not
3921 entirely reliable around libcalls;
3922 newlib/libm/math/e_pow.c is a test case. Sometimes
3923 an insn will appear in LOG_LINKS even though it is
3924 not the most recent insn which sets the register. */
3928 || find_reg_note (scan, REG_DEAD, reg)))
3937 /* Either there was a branch, or some insn used REG
3938 other than as a function call address. */
3942 /* Create a code label, and put it in a REG_LABEL note on
3943 the insn which sets the register, and on each call insn
3944 which uses the register. In final_prescan_insn we look
3945 for the REG_LABEL notes, and output the appropriate label
3948 label = gen_label_rtx ();
3949 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3951 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3960 scan = NEXT_INSN (scan);
3962 && ((GET_CODE (scan) == CALL_INSN
3963 && reg_mentioned_p (reg, scan))
3964 || ((reg2 = sfunc_uses_reg (scan))
3965 && REGNO (reg2) == REGNO (reg))))
3967 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3969 while (scan != dies);
3975 fixup_addr_diff_vecs (first);
3979 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3980 shorten_branches (first);
3982 /* Scan the function looking for move instructions which have to be
3983 changed to pc-relative loads and insert the literal tables. */
3985 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3986 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3993 else if (GET_CODE (insn) == JUMP_INSN
3994 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4002 /* Some code might have been inserted between the mova and
4003 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4004 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4005 total += get_attr_length (scan);
4007 /* range of mova is 1020, add 4 because pc counts from address of
4008 second instruction after this one, subtract 2 in case pc is 2
4009 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4010 cancels out with alignment effects of the mova itself. */
4013 /* Change the mova into a load, and restart scanning
4014 there. broken_move will then return true for mova. */
4015 SET_SRC (PATTERN (mova))
4016 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4017 INSN_CODE (mova) = -1;
4021 if (broken_move (insn))
4024 /* Scan ahead looking for a barrier to stick the constant table
4026 rtx barrier = find_barrier (num_mova, mova, insn);
4027 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4029 if (num_mova && ! mova_p (mova))
4031 /* find_barrier had to change the first mova into a
4032 pcload; thus, we have to start with this new pcload. */
4036 /* Now find all the moves between the points and modify them. */
4037 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4039 if (GET_CODE (scan) == CODE_LABEL)
4041 if (broken_move (scan))
4043 rtx *patp = &PATTERN (scan), pat = *patp;
4047 enum machine_mode mode;
4049 if (GET_CODE (pat) == PARALLEL)
4050 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4051 src = SET_SRC (pat);
4052 dst = SET_DEST (pat);
4053 mode = GET_MODE (dst);
4055 if (mode == SImode && hi_const (src)
4056 && REGNO (dst) != FPUL_REG)
4061 while (GET_CODE (dst) == SUBREG)
4063 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4064 GET_MODE (SUBREG_REG (dst)),
4067 dst = SUBREG_REG (dst);
4069 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4072 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4074 /* This must be an insn that clobbers r0. */
4075 rtx clobber = XVECEXP (PATTERN (scan), 0,
4076 XVECLEN (PATTERN (scan), 0) - 1);
4078 if (GET_CODE (clobber) != CLOBBER
4079 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4083 && reg_set_between_p (r0_rtx, last_float_move, scan))
4087 && GET_MODE_SIZE (mode) != 4
4088 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4090 lab = add_constant (src, mode, last_float);
4092 emit_insn_before (gen_mova (lab), scan);
4095 /* There will be a REG_UNUSED note for r0 on
4096 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4097 lest reorg:mark_target_live_regs will not
4098 consider r0 to be used, and we end up with delay
4099 slot insn in front of SCAN that clobbers r0. */
4101 = find_regno_note (last_float_move, REG_UNUSED, 0);
4103 /* If we are not optimizing, then there may not be
4106 PUT_MODE (note, REG_INC);
4108 *last_float_addr = r0_inc_rtx;
4110 last_float_move = scan;
4112 newsrc = gen_rtx (MEM, mode,
4113 (((TARGET_SH4 && ! TARGET_FMOVD)
4114 || REGNO (dst) == FPUL_REG)
4117 last_float_addr = &XEXP (newsrc, 0);
4119 /* Remove the clobber of r0. */
4120 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4121 RTX_UNCHANGING_P (newsrc) = 1;
4123 /* This is a mova needing a label. Create it. */
4124 else if (GET_CODE (src) == UNSPEC
4125 && XINT (src, 1) == UNSPEC_MOVA
4126 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4128 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4129 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4130 newsrc = gen_rtx_UNSPEC (SImode,
4131 gen_rtvec (1, newsrc),
4136 lab = add_constant (src, mode, 0);
4137 newsrc = gen_rtx_MEM (mode,
4138 gen_rtx_LABEL_REF (VOIDmode, lab));
4139 RTX_UNCHANGING_P (newsrc) = 1;
4141 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4142 INSN_CODE (scan) = -1;
4145 dump_table (barrier);
4150 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4151 INSN_ADDRESSES_FREE ();
4152 split_branches (first);
4154 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4155 also has an effect on the register that holds the address of the sfunc.
4156 Insert an extra dummy insn in front of each sfunc that pretends to
4157 use this register. */
4158 if (flag_delayed_branch)
4160 for (insn = first; insn; insn = NEXT_INSN (insn))
4162 rtx reg = sfunc_uses_reg (insn);
4166 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4170 /* fpscr is not actually a user variable, but we pretend it is for the
4171 sake of the previous optimization passes, since we want it handled like
4172 one. However, we don't have any debugging information for it, so turn
4173 it into a non-user variable now. */
4175 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4177 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4181 get_dest_uid (label, max_uid)
4185 rtx dest = next_real_insn (label);
4188 /* This can happen for an undefined label. */
4190 dest_uid = INSN_UID (dest);
4191 /* If this is a newly created branch redirection blocking instruction,
4192 we cannot index the branch_uid or insn_addresses arrays with its
4193 uid. But then, we won't need to, because the actual destination is
4194 the following branch. */
4195 while (dest_uid >= max_uid)
4197 dest = NEXT_INSN (dest);
4198 dest_uid = INSN_UID (dest);
4200 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4205 /* Split condbranches that are out of range. Also add clobbers for
4206 scratch registers that are needed in far jumps.
4207 We do this before delay slot scheduling, so that it can take our
4208 newly created instructions into account. It also allows us to
4209 find branches with common targets more easily. */
4212 split_branches (first)
4216 struct far_branch **uid_branch, *far_branch_list = 0;
4217 int max_uid = get_max_uid ();
4219 /* Find out which branches are out of range. */
4220 shorten_branches (first);
4222 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4223 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4225 for (insn = first; insn; insn = NEXT_INSN (insn))
4226 if (! INSN_P (insn))
4228 else if (INSN_DELETED_P (insn))
4230 /* Shorten_branches would split this instruction again,
4231 so transform it into a note. */
4232 PUT_CODE (insn, NOTE);
4233 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4234 NOTE_SOURCE_FILE (insn) = 0;
4236 else if (GET_CODE (insn) == JUMP_INSN
4237 /* Don't mess with ADDR_DIFF_VEC */
4238 && (GET_CODE (PATTERN (insn)) == SET
4239 || GET_CODE (PATTERN (insn)) == RETURN))
4241 enum attr_type type = get_attr_type (insn);
4242 if (type == TYPE_CBRANCH)
4246 if (get_attr_length (insn) > 4)
4248 rtx src = SET_SRC (PATTERN (insn));
4249 rtx olabel = XEXP (XEXP (src, 1), 0);
4250 int addr = INSN_ADDRESSES (INSN_UID (insn));
4252 int dest_uid = get_dest_uid (olabel, max_uid);
4253 struct far_branch *bp = uid_branch[dest_uid];
4255 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4256 the label if the LABEL_NUSES count drops to zero. There is
4257 always a jump_optimize pass that sets these values, but it
4258 proceeds to delete unreferenced code, and then if not
4259 optimizing, to un-delete the deleted instructions, thus
4260 leaving labels with too low uses counts. */
4263 JUMP_LABEL (insn) = olabel;
4264 LABEL_NUSES (olabel)++;
4268 bp = (struct far_branch *) alloca (sizeof *bp);
4269 uid_branch[dest_uid] = bp;
4270 bp->prev = far_branch_list;
4271 far_branch_list = bp;
4273 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4274 LABEL_NUSES (bp->far_label)++;
4278 label = bp->near_label;
4279 if (! label && bp->address - addr >= CONDJUMP_MIN)
4281 rtx block = bp->insert_place;
4283 if (GET_CODE (PATTERN (block)) == RETURN)
4284 block = PREV_INSN (block);
4286 block = gen_block_redirect (block,
4288 label = emit_label_after (gen_label_rtx (),
4290 bp->near_label = label;
4292 else if (label && ! NEXT_INSN (label))
4294 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4295 bp->insert_place = insn;
4297 gen_far_branch (bp);
4301 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4303 bp->near_label = label = gen_label_rtx ();
4304 bp->insert_place = insn;
4307 if (! redirect_jump (insn, label, 1))
4312 /* get_attr_length (insn) == 2 */
4313 /* Check if we have a pattern where reorg wants to redirect
4314 the branch to a label from an unconditional branch that
4316 /* We can't use JUMP_LABEL here because it might be undefined
4317 when not optimizing. */
4318 /* A syntax error might cause beyond to be NULL_RTX. */
4320 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4324 && (GET_CODE (beyond) == JUMP_INSN
4325 || ((beyond = next_active_insn (beyond))
4326 && GET_CODE (beyond) == JUMP_INSN))
4327 && GET_CODE (PATTERN (beyond)) == SET
4328 && recog_memoized (beyond) == CODE_FOR_jump_compact
4330 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4331 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4333 gen_block_redirect (beyond,
4334 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4337 next = next_active_insn (insn);
4339 if ((GET_CODE (next) == JUMP_INSN
4340 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4341 && GET_CODE (PATTERN (next)) == SET
4342 && recog_memoized (next) == CODE_FOR_jump_compact
4344 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4345 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4347 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4349 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4351 int addr = INSN_ADDRESSES (INSN_UID (insn));
4354 struct far_branch *bp;
4356 if (type == TYPE_JUMP)
4358 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4359 dest_uid = get_dest_uid (far_label, max_uid);
4362 /* Parse errors can lead to labels outside
4364 if (! NEXT_INSN (far_label))
4369 JUMP_LABEL (insn) = far_label;
4370 LABEL_NUSES (far_label)++;
4372 redirect_jump (insn, NULL_RTX, 1);
4376 bp = uid_branch[dest_uid];
4379 bp = (struct far_branch *) alloca (sizeof *bp);
4380 uid_branch[dest_uid] = bp;
4381 bp->prev = far_branch_list;
4382 far_branch_list = bp;
4384 bp->far_label = far_label;
4386 LABEL_NUSES (far_label)++;
4388 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4389 if (addr - bp->address <= CONDJUMP_MAX)
4390 emit_label_after (bp->near_label, PREV_INSN (insn));
4393 gen_far_branch (bp);
4399 bp->insert_place = insn;
4401 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4403 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4406 /* Generate all pending far branches,
4407 and free our references to the far labels. */
4408 while (far_branch_list)
4410 if (far_branch_list->near_label
4411 && ! NEXT_INSN (far_branch_list->near_label))
4412 gen_far_branch (far_branch_list);
4414 && far_branch_list->far_label
4415 && ! --LABEL_NUSES (far_branch_list->far_label))
4416 delete_insn (far_branch_list->far_label);
4417 far_branch_list = far_branch_list->prev;
4420 /* Instruction length information is no longer valid due to the new
4421 instructions that have been generated. */
4422 init_insn_lengths ();
4425 /* Dump out instruction addresses, which is useful for debugging the
4426 constant pool table stuff.
4428 If relaxing, output the label and pseudo-ops used to link together
4429 calls and the instruction which set the registers. */
4431 /* ??? The addresses printed by this routine for insns are nonsense for
4432 insns which are inside of a sequence where none of the inner insns have
4433 variable length. This is because the second pass of shorten_branches
4434 does not bother to update them. */
4437 final_prescan_insn (insn, opvec, noperands)
4439 rtx *opvec ATTRIBUTE_UNUSED;
4440 int noperands ATTRIBUTE_UNUSED;
4442 if (TARGET_DUMPISIZE)
4443 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4449 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4454 pattern = PATTERN (insn);
4455 if (GET_CODE (pattern) == PARALLEL)
4456 pattern = XVECEXP (pattern, 0, 0);
4457 if (GET_CODE (pattern) == CALL
4458 || (GET_CODE (pattern) == SET
4459 && (GET_CODE (SET_SRC (pattern)) == CALL
4460 || get_attr_type (insn) == TYPE_SFUNC)))
4461 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4462 CODE_LABEL_NUMBER (XEXP (note, 0)));
4463 else if (GET_CODE (pattern) == SET)
4464 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4465 CODE_LABEL_NUMBER (XEXP (note, 0)));
4472 /* Dump out any constants accumulated in the final pass. These will
4476 output_jump_label_table ()
4482 fprintf (asm_out_file, "\t.align 2\n");
4483 for (i = 0; i < pool_size; i++)
4485 pool_node *p = &pool_vector[i];
4487 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4488 CODE_LABEL_NUMBER (p->label));
4489 output_asm_insn (".long %O0", &p->value);
4497 /* A full frame looks like:
4501 [ if current_function_anonymous_args
4514 local-0 <- fp points here. */
4516 /* Number of bytes pushed for anonymous args, used to pass information
4517 between expand_prologue and expand_epilogue. */
4519 static int extra_push;
4521 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4522 to be adjusted, and TEMP, if nonnegative, holds the register number
4523 of a general register that we may clobber. */
4526 output_stack_adjust (size, reg, temp, emit_fn)
4530 rtx (*emit_fn) PARAMS ((rtx));
4534 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4539 if (CONST_OK_FOR_ADD (size))
4540 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4541 /* Try to do it with two partial adjustments; however, we must make
4542 sure that the stack is properly aligned at all times, in case
4543 an interrupt occurs between the two partial adjustments. */
4544 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4545 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4547 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4548 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4555 /* If TEMP is invalid, we could temporarily save a general
4556 register to MACL. However, there is currently no need
4557 to handle this case, so just abort when we see it. */
4560 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4562 /* If SIZE is negative, subtract the positive value.
4563 This sometimes allows a constant pool entry to be shared
4564 between prologue and epilogue code. */
4567 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4568 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4572 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4573 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4575 if (emit_fn == frame_insn)
4577 = (gen_rtx_EXPR_LIST
4578 (REG_FRAME_RELATED_EXPR,
4579 gen_rtx_SET (VOIDmode, reg,
4580 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4591 RTX_FRAME_RELATED_P (x) = 1;
4595 /* Output RTL to push register RN onto the stack. */
4603 x = gen_push_fpul ();
4604 else if (rn == FPSCR_REG)
4605 x = gen_push_fpscr ();
4606 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4607 && FP_OR_XD_REGISTER_P (rn))
4609 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4611 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4613 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4614 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4616 x = gen_push (gen_rtx_REG (SImode, rn));
4620 = gen_rtx_EXPR_LIST (REG_INC,
4621 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4625 /* Output RTL to pop register RN from the stack. */
4633 x = gen_pop_fpul ();
4634 else if (rn == FPSCR_REG)
4635 x = gen_pop_fpscr ();
4636 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4637 && FP_OR_XD_REGISTER_P (rn))
4639 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4641 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4643 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4644 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4646 x = gen_pop (gen_rtx_REG (SImode, rn));
4650 = gen_rtx_EXPR_LIST (REG_INC,
4651 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4654 /* Generate code to push the regs specified in the mask. */
4657 push_regs (mask, interrupt_handler)
4659 int interrupt_handler;
4664 /* Push PR last; this gives better latencies after the prologue, and
4665 candidates for the return delay slot when there are no general
4666 registers pushed. */
4667 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4669 /* If this is an interrupt handler, and the SZ bit varies,
4670 and we have to push any floating point register, we need
4671 to switch to the correct precision first. */
4672 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4673 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4675 HARD_REG_SET unsaved;
4678 COMPL_HARD_REG_SET(unsaved, *mask);
4679 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4683 && (i != FPSCR_REG || ! skip_fpscr)
4684 && TEST_HARD_REG_BIT (*mask, i))
4687 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4691 /* Work out the registers which need to be saved, both as a mask and a
4692 count of saved words. Return the count.
4694 If doing a pragma interrupt function, then push all regs used by the
4695 function, and if we call another function (we can tell by looking at PR),
4696 make sure that all the regs it clobbers are safe too. */
4699 calc_live_regs (live_regs_mask)
4700 HARD_REG_SET *live_regs_mask;
4704 int interrupt_handler;
4707 interrupt_handler = sh_cfun_interrupt_handler_p ();
4709 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4710 CLEAR_HARD_REG_SET (*live_regs_mask);
4711 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4712 && regs_ever_live[FPSCR_REG])
4713 target_flags &= ~FPU_SINGLE_BIT;
4714 /* If we can save a lot of saves by switching to double mode, do that. */
4715 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4716 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4717 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4718 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4721 target_flags &= ~FPU_SINGLE_BIT;
4724 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4725 knows how to use it. That means the pseudo originally allocated for
4726 the initial value can become the PR_MEDIA_REG hard register, as seen for
4727 execute/20010122-1.c:test9. */
4729 pr_live = regs_ever_live[PR_MEDIA_REG];
4732 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4733 pr_live = (pr_initial
4734 ? (GET_CODE (pr_initial) != REG
4735 || REGNO (pr_initial) != (PR_REG))
4736 : regs_ever_live[PR_REG]);
4738 /* Force PR to be live if the prologue has to call the SHmedia
4739 argument decoder or register saver. */
4740 if (TARGET_SHCOMPACT
4741 && ((current_function_args_info.call_cookie
4742 & ~ CALL_COOKIE_RET_TRAMP (1))
4743 || current_function_has_nonlocal_label))
4745 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4747 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4749 : (interrupt_handler && ! pragma_trapa)
4750 ? (/* Need to save all the regs ever live. */
4751 (regs_ever_live[reg]
4752 || (call_used_regs[reg]
4753 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4755 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4756 && reg != RETURN_ADDRESS_POINTER_REGNUM
4757 && reg != T_REG && reg != GBR_REG
4758 /* Push fpscr only on targets which have FPU */
4759 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4760 : (/* Only push those regs which are used and need to be saved. */
4763 && current_function_args_info.call_cookie
4764 && reg == PIC_OFFSET_TABLE_REGNUM)
4765 || (regs_ever_live[reg] && ! call_used_regs[reg])
4766 || (current_function_calls_eh_return
4767 && (reg == EH_RETURN_DATA_REGNO (0)
4768 || reg == EH_RETURN_DATA_REGNO (1)
4769 || reg == EH_RETURN_DATA_REGNO (2)
4770 || reg == EH_RETURN_DATA_REGNO (3)))))
4772 SET_HARD_REG_BIT (*live_regs_mask, reg);
4773 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4775 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4776 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4778 if (FP_REGISTER_P (reg))
4780 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4782 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
4783 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4786 else if (XD_REGISTER_P (reg))
4788 /* Must switch to double mode to access these registers. */
4789 target_flags &= ~FPU_SINGLE_BIT;
4798 /* Code to generate prologue and epilogue sequences */
4800 /* PUSHED is the number of bytes that are being pushed on the
4801 stack for register saves. Return the frame size, padded
4802 appropriately so that the stack stays properly aligned. */
4803 static HOST_WIDE_INT
4804 rounded_frame_size (pushed)
4807 HOST_WIDE_INT size = get_frame_size ();
4808 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4810 return ((size + pushed + align - 1) & -align) - pushed;
4813 /* Choose a call-clobbered target-branch register that remains
4814 unchanged along the whole function. We set it up as the return
4815 value in the prologue. */
4817 sh_media_register_for_return ()
4822 if (! current_function_is_leaf)
4825 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4827 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4828 if (call_used_regs[regno] && ! regs_ever_live[regno])
4835 sh_expand_prologue ()
4837 HARD_REG_SET live_regs_mask;
4840 int save_flags = target_flags;
4842 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4844 /* We have pretend args if we had an object sent partially in registers
4845 and partially on the stack, e.g. a large structure. */
4846 output_stack_adjust (-current_function_pretend_args_size
4847 - current_function_args_info.stack_regs * 8,
4848 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4852 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4853 /* We're going to use the PIC register to load the address of the
4854 incoming-argument decoder and/or of the return trampoline from
4855 the GOT, so make sure the PIC register is preserved and
4857 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4859 if (TARGET_SHCOMPACT
4860 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4864 /* First, make all registers with incoming arguments that will
4865 be pushed onto the stack live, so that register renaming
4866 doesn't overwrite them. */
4867 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4868 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4869 >= NPARM_REGS (SImode) - reg)
4870 for (; reg < NPARM_REGS (SImode); reg++)
4871 emit_insn (gen_shcompact_preserve_incoming_args
4872 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4873 else if (CALL_COOKIE_INT_REG_GET
4874 (current_function_args_info.call_cookie, reg) == 1)
4875 emit_insn (gen_shcompact_preserve_incoming_args
4876 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4878 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4880 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4881 GEN_INT (current_function_args_info.call_cookie));
4882 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4883 gen_rtx_REG (SImode, R0_REG));
4885 else if (TARGET_SHMEDIA)
4887 int tr = sh_media_register_for_return ();
4891 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4892 gen_rtx_REG (DImode, PR_MEDIA_REG));
4894 /* If this function only exits with sibcalls, this copy
4895 will be flagged as dead. */
4896 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4902 /* Emit the code for SETUP_VARARGS. */
4903 if (current_function_stdarg)
4905 /* This is not used by the SH2E calling convention */
4906 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 && ! TARGET_HITACHI)
4908 /* Push arg regs as if they'd been provided by caller in stack. */
4909 for (i = 0; i < NPARM_REGS(SImode); i++)
4911 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4914 if (i >= (NPARM_REGS(SImode)
4915 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4919 RTX_FRAME_RELATED_P (insn) = 0;
4925 /* If we're supposed to switch stacks at function entry, do so now. */
4927 emit_insn (gen_sp_switch_1 ());
4929 d = calc_live_regs (&live_regs_mask);
4930 /* ??? Maybe we could save some switching if we can move a mode switch
4931 that already happens to be at the function start into the prologue. */
4932 if (target_flags != save_flags && ! current_function_interrupt)
4933 emit_insn (gen_toggle_sz ());
4940 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4941 int offset_in_r0 = -1;
4944 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4945 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4946 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4948 offset = d + d_rounding;
4949 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4951 /* We loop twice: first, we save 8-byte aligned registers in the
4952 higher addresses, that are known to be aligned. Then, we
4953 proceed to saving 32-bit registers that don't need 8-byte
4955 /* Note that if you change this code in a way that affects where
4956 the return register is saved, you have to update not only
4957 sh_expand_epilogue, but also sh_set_return_address. */
4958 for (align = 1; align >= 0; align--)
4959 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4960 if (TEST_HARD_REG_BIT (live_regs_mask, i))
4962 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4964 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4966 if (mode == SFmode && (i % 2) == 1
4967 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4968 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
4975 /* If we're doing the aligned pass and this is not aligned,
4976 or we're doing the unaligned pass and this is aligned,
4978 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4982 offset -= GET_MODE_SIZE (mode);
4984 reg_rtx = gen_rtx_REG (mode, reg);
4986 mem_rtx = gen_rtx_MEM (mode,
4987 gen_rtx_PLUS (Pmode,
4991 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
4997 if (HAVE_PRE_DECREMENT
4998 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
4999 || mem_rtx == NULL_RTX
5000 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5002 pre_dec = gen_rtx_MEM (mode,
5003 gen_rtx_PRE_DEC (Pmode, r0));
5005 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5014 offset += GET_MODE_SIZE (mode);
5018 if (mem_rtx != NULL_RTX)
5021 if (offset_in_r0 == -1)
5023 emit_move_insn (r0, GEN_INT (offset));
5024 offset_in_r0 = offset;
5026 else if (offset != offset_in_r0)
5031 GEN_INT (offset - offset_in_r0)));
5032 offset_in_r0 += offset - offset_in_r0;
5035 if (pre_dec != NULL_RTX)
5041 (Pmode, r0, stack_pointer_rtx));
5045 offset -= GET_MODE_SIZE (mode);
5046 offset_in_r0 -= GET_MODE_SIZE (mode);
5051 mem_rtx = gen_rtx_MEM (mode, r0);
5053 mem_rtx = gen_rtx_MEM (mode,
5054 gen_rtx_PLUS (Pmode,
5058 /* We must not use an r0-based address for target-branch
5059 registers or for special registers without pre-dec
5060 memory addresses, since we store their values in r0
5062 if (TARGET_REGISTER_P (i)
5063 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5064 && mem_rtx != pre_dec))
5068 if (TARGET_REGISTER_P (i)
5069 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5070 && mem_rtx != pre_dec))
5072 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
5074 emit_move_insn (r0mode, reg_rtx);
5082 emit_move_insn (mem_rtx, reg_rtx);
5085 if (offset != d_rounding)
5089 push_regs (&live_regs_mask, current_function_interrupt);
5091 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5093 rtx insn = get_last_insn ();
5094 rtx last = emit_insn (gen_GOTaddr2picreg ());
5096 /* Mark these insns as possibly dead. Sometimes, flow2 may
5097 delete all uses of the PIC register. In this case, let it
5098 delete the initialization too. */
5101 insn = NEXT_INSN (insn);
5103 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5107 while (insn != last);
5110 if (SHMEDIA_REGS_STACK_ADJUST ())
5112 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5113 gen_rtx_SYMBOL_REF (Pmode,
5115 ? "__GCC_push_shmedia_regs"
5116 : "__GCC_push_shmedia_regs_nofpu"));
5117 /* This must NOT go through the PLT, otherwise mach and macl
5118 may be clobbered. */
5119 emit_insn (gen_shmedia_save_restore_regs_compact
5120 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5123 if (target_flags != save_flags && ! current_function_interrupt)
5125 rtx insn = emit_insn (gen_toggle_sz ());
5127 /* If we're lucky, a mode switch in the function body will
5128 overwrite fpscr, turning this insn dead. Tell flow this
5129 insn is ok to delete. */
5130 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5135 target_flags = save_flags;
5137 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5138 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
5140 if (frame_pointer_needed)
5141 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5143 if (TARGET_SHCOMPACT
5144 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5146 /* This must NOT go through the PLT, otherwise mach and macl
5147 may be clobbered. */
5148 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5149 gen_rtx_SYMBOL_REF (Pmode,
5150 "__GCC_shcompact_incoming_args"));
5151 emit_insn (gen_shcompact_incoming_args ());
5156 sh_expand_epilogue ()
5158 HARD_REG_SET live_regs_mask;
5162 int save_flags = target_flags;
5164 int fpscr_deferred = 0;
5166 d = calc_live_regs (&live_regs_mask);
5168 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
5169 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5170 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5172 frame_size = rounded_frame_size (d) - d_rounding;
5174 if (frame_pointer_needed)
5176 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
5178 /* We must avoid moving the stack pointer adjustment past code
5179 which reads from the local frame, else an interrupt could
5180 occur after the SP adjustment and clobber data in the local
5182 emit_insn (gen_blockage ());
5183 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5185 else if (frame_size)
5187 /* We must avoid moving the stack pointer adjustment past code
5188 which reads from the local frame, else an interrupt could
5189 occur after the SP adjustment and clobber data in the local
5191 emit_insn (gen_blockage ());
5192 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
5195 if (SHMEDIA_REGS_STACK_ADJUST ())
5197 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5198 gen_rtx_SYMBOL_REF (Pmode,
5200 ? "__GCC_pop_shmedia_regs"
5201 : "__GCC_pop_shmedia_regs_nofpu"));
5202 /* This must NOT go through the PLT, otherwise mach and macl
5203 may be clobbered. */
5204 emit_insn (gen_shmedia_save_restore_regs_compact
5205 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5208 /* Pop all the registers. */
5210 if (target_flags != save_flags && ! current_function_interrupt)
5211 emit_insn (gen_toggle_sz ());
5214 int offset = d_rounding;
5215 int offset_in_r0 = -1;
5218 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5219 int tmp_regno = R20_REG;
5221 /* We loop twice: first, we save 8-byte aligned registers in the
5222 higher addresses, that are known to be aligned. Then, we
5223 proceed to saving 32-bit registers that don't need 8-byte
5225 for (align = 0; align <= 1; align++)
5226 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5227 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5229 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5231 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5233 if (mode == SFmode && (i % 2) == 0
5234 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5235 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5241 /* If we're doing the aligned pass and this is not aligned,
5242 or we're doing the unaligned pass and this is aligned,
5244 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5248 reg_rtx = gen_rtx_REG (mode, reg);
5250 mem_rtx = gen_rtx_MEM (mode,
5251 gen_rtx_PLUS (Pmode,
5255 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5261 if (HAVE_POST_INCREMENT
5262 && (offset == offset_in_r0
5263 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5264 && mem_rtx == NULL_RTX)
5265 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5267 post_inc = gen_rtx_MEM (mode,
5268 gen_rtx_POST_INC (Pmode, r0));
5270 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5273 post_inc = NULL_RTX;
5282 if (mem_rtx != NULL_RTX)
5285 if (offset_in_r0 == -1)
5287 emit_move_insn (r0, GEN_INT (offset));
5288 offset_in_r0 = offset;
5290 else if (offset != offset_in_r0)
5295 GEN_INT (offset - offset_in_r0)));
5296 offset_in_r0 += offset - offset_in_r0;
5299 if (post_inc != NULL_RTX)
5305 (Pmode, r0, stack_pointer_rtx));
5311 offset_in_r0 += GET_MODE_SIZE (mode);
5314 mem_rtx = gen_rtx_MEM (mode, r0);
5316 mem_rtx = gen_rtx_MEM (mode,
5317 gen_rtx_PLUS (Pmode,
5321 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5322 && mem_rtx != post_inc)
5326 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5327 && mem_rtx != post_inc)
5329 insn = emit_move_insn (r0, mem_rtx);
5332 else if (TARGET_REGISTER_P (i))
5334 rtx tmp_reg = gen_rtx_REG (mode, tmp_regno);
5336 /* Give the scheduler a bit of freedom by using R20..R23
5337 in a round-robin fashion. Don't use R1 here because
5338 we want to use it for EH_RETURN_STACKADJ_RTX. */
5339 insn = emit_move_insn (tmp_reg, mem_rtx);
5341 if (++tmp_regno > R23_REG)
5342 tmp_regno = R20_REG;
5345 insn = emit_move_insn (reg_rtx, mem_rtx);
5347 offset += GET_MODE_SIZE (mode);
5350 if (offset != d + d_rounding)
5357 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5359 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5361 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5363 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5364 && hard_regs_intersect_p (&live_regs_mask,
5365 ®_class_contents[DF_REGS]))
5367 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5369 if (j == FIRST_FP_REG && fpscr_deferred)
5373 if (target_flags != save_flags && ! current_function_interrupt)
5374 emit_insn (gen_toggle_sz ());
5375 target_flags = save_flags;
5377 output_stack_adjust (extra_push + current_function_pretend_args_size
5379 + current_function_args_info.stack_regs * 8,
5380 stack_pointer_rtx, 7, emit_insn);
5382 if (current_function_calls_eh_return)
5383 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5384 EH_RETURN_STACKADJ_RTX));
5386 /* Switch back to the normal stack if necessary. */
5388 emit_insn (gen_sp_switch_2 ());
5390 /* Tell flow the insn that pops PR isn't dead. */
5391 /* PR_REG will never be live in SHmedia mode, and we don't need to
5392 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5393 by the return pattern. */
5394 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5395 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5398 static int sh_need_epilogue_known = 0;
5403 if (! sh_need_epilogue_known)
5408 sh_expand_epilogue ();
5409 epilogue = get_insns ();
5411 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5413 return sh_need_epilogue_known > 0;
5416 /* Emit code to change the current function's return address to RA.
5417 TEMP is available as a scratch register, if needed. */
5420 sh_set_return_address (ra, tmp)
5423 HARD_REG_SET live_regs_mask;
5426 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5429 d = calc_live_regs (&live_regs_mask);
5431 /* If pr_reg isn't life, we can set it (or the register given in
5432 sh_media_register_for_return) directly. */
5433 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5439 int rr_regno = sh_media_register_for_return ();
5444 rr = gen_rtx_REG (DImode, rr_regno);
5447 rr = gen_rtx_REG (SImode, pr_reg);
5449 emit_insn (GEN_MOV (rr, ra));
5450 /* Tell flow the register for return isn't dead. */
5451 emit_insn (gen_rtx_USE (VOIDmode, rr));
5461 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5462 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5463 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5467 /* We loop twice: first, we save 8-byte aligned registers in the
5468 higher addresses, that are known to be aligned. Then, we
5469 proceed to saving 32-bit registers that don't need 8-byte
5471 for (align = 0; align <= 1; align++)
5472 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5473 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5475 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5477 if (mode == SFmode && (i % 2) == 0
5478 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5479 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5485 /* If we're doing the aligned pass and this is not aligned,
5486 or we're doing the unaligned pass and this is aligned,
5488 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5495 offset += GET_MODE_SIZE (mode);
5498 /* We can't find pr register. */
5502 pr_offset = (rounded_frame_size (d) - d_rounding + offset
5503 + SHMEDIA_REGS_STACK_ADJUST ());
5506 pr_offset = rounded_frame_size (d) - d_rounding;
5508 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5509 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5511 tmp = gen_rtx_MEM (Pmode, tmp);
5512 emit_insn (GEN_MOV (tmp, ra));
5515 /* Clear variables at function end. */
5518 sh_output_function_epilogue (file, size)
5519 FILE *file ATTRIBUTE_UNUSED;
5520 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5522 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5523 sh_need_epilogue_known = 0;
5524 sp_switch = NULL_RTX;
5528 sh_builtin_saveregs ()
5530 /* First unnamed integer register. */
5531 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5532 /* Number of integer registers we need to save. */
5533 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5534 /* First unnamed SFmode float reg */
5535 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5536 /* Number of SFmode float regs to save. */
5537 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5540 HOST_WIDE_INT alias_set;
5546 int pushregs = n_intregs;
5548 while (pushregs < NPARM_REGS (SImode) - 1
5549 && (CALL_COOKIE_INT_REG_GET
5550 (current_function_args_info.call_cookie,
5551 NPARM_REGS (SImode) - pushregs)
5554 current_function_args_info.call_cookie
5555 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5560 if (pushregs == NPARM_REGS (SImode))
5561 current_function_args_info.call_cookie
5562 |= (CALL_COOKIE_INT_REG (0, 1)
5563 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5565 current_function_args_info.call_cookie
5566 |= CALL_COOKIE_STACKSEQ (pushregs);
5568 current_function_pretend_args_size += 8 * n_intregs;
5570 if (TARGET_SHCOMPACT)
5574 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5576 error ("__builtin_saveregs not supported by this subtarget");
5583 /* Allocate block of memory for the regs. */
5584 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5585 Or can assign_stack_local accept a 0 SIZE argument? */
5586 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5589 regbuf = gen_rtx_MEM (BLKmode,
5590 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5591 else if (n_floatregs & 1)
5595 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5596 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5597 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5598 regbuf = change_address (regbuf, BLKmode, addr);
5601 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5602 alias_set = get_varargs_alias_set ();
5603 set_mem_alias_set (regbuf, alias_set);
5606 This is optimized to only save the regs that are necessary. Explicitly
5607 named args need not be saved. */
5609 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5610 adjust_address (regbuf, BLKmode,
5611 n_floatregs * UNITS_PER_WORD),
5615 /* Return the address of the regbuf. */
5616 return XEXP (regbuf, 0);
5619 This is optimized to only save the regs that are necessary. Explicitly
5620 named args need not be saved.
5621 We explicitly build a pointer to the buffer because it halves the insn
5622 count when not optimizing (otherwise the pointer is built for each reg
5624 We emit the moves in reverse order so that we can use predecrement. */
5626 fpregs = gen_reg_rtx (Pmode);
5627 emit_move_insn (fpregs, XEXP (regbuf, 0));
5628 emit_insn (gen_addsi3 (fpregs, fpregs,
5629 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5633 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5635 emit_insn (gen_addsi3 (fpregs, fpregs,
5636 GEN_INT (-2 * UNITS_PER_WORD)));
5637 mem = gen_rtx_MEM (DFmode, fpregs);
5638 set_mem_alias_set (mem, alias_set);
5639 emit_move_insn (mem,
5640 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5642 regno = first_floatreg;
5645 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5646 mem = gen_rtx_MEM (SFmode, fpregs);
5647 set_mem_alias_set (mem, alias_set);
5648 emit_move_insn (mem,
5649 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5650 - (TARGET_LITTLE_ENDIAN != 0)));
5654 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5658 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5659 mem = gen_rtx_MEM (SFmode, fpregs);
5660 set_mem_alias_set (mem, alias_set);
5661 emit_move_insn (mem,
5662 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5665 /* Return the address of the regbuf. */
5666 return XEXP (regbuf, 0);
5669 /* Define the `__builtin_va_list' type for the ABI. */
5674 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5677 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5678 return ptr_type_node;
5680 record = make_node (RECORD_TYPE);
5682 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5684 f_next_o_limit = build_decl (FIELD_DECL,
5685 get_identifier ("__va_next_o_limit"),
5687 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5689 f_next_fp_limit = build_decl (FIELD_DECL,
5690 get_identifier ("__va_next_fp_limit"),
5692 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5695 DECL_FIELD_CONTEXT (f_next_o) = record;
5696 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5697 DECL_FIELD_CONTEXT (f_next_fp) = record;
5698 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5699 DECL_FIELD_CONTEXT (f_next_stack) = record;
5701 TYPE_FIELDS (record) = f_next_o;
5702 TREE_CHAIN (f_next_o) = f_next_o_limit;
5703 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5704 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5705 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5707 layout_type (record);
5712 /* Implement `va_start' for varargs and stdarg. */
5715 sh_va_start (valist, nextarg)
5719 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5720 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5726 expand_builtin_saveregs ();
5727 std_expand_builtin_va_start (valist, nextarg);
5731 if ((! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5733 std_expand_builtin_va_start (valist, nextarg);
5737 f_next_o = TYPE_FIELDS (va_list_type_node);
5738 f_next_o_limit = TREE_CHAIN (f_next_o);
5739 f_next_fp = TREE_CHAIN (f_next_o_limit);
5740 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5741 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5743 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5744 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5745 valist, f_next_o_limit);
5746 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5747 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5748 valist, f_next_fp_limit);
5749 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5750 valist, f_next_stack);
5752 /* Call __builtin_saveregs. */
5753 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5754 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5755 TREE_SIDE_EFFECTS (t) = 1;
5756 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5758 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5763 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5764 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5765 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5766 TREE_SIDE_EFFECTS (t) = 1;
5767 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5769 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5770 TREE_SIDE_EFFECTS (t) = 1;
5771 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5773 nint = current_function_args_info.arg_count[SH_ARG_INT];
5778 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5779 build_int_2 (UNITS_PER_WORD * nint, 0)));
5780 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5781 TREE_SIDE_EFFECTS (t) = 1;
5782 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5784 u = make_tree (ptr_type_node, nextarg);
5785 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5786 TREE_SIDE_EFFECTS (t) = 1;
5787 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5790 /* Implement `va_arg'. */
5793 sh_va_arg (valist, type)
5796 HOST_WIDE_INT size, rsize;
5797 tree tmp, pptr_type_node;
5800 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5802 size = int_size_in_bytes (type);
5803 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5804 pptr_type_node = build_pointer_type (ptr_type_node);
5807 type = build_pointer_type (type);
5809 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) && ! TARGET_HITACHI)
5811 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5812 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5814 rtx lab_false, lab_over;
5816 f_next_o = TYPE_FIELDS (va_list_type_node);
5817 f_next_o_limit = TREE_CHAIN (f_next_o);
5818 f_next_fp = TREE_CHAIN (f_next_o_limit);
5819 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5820 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5822 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5823 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5824 valist, f_next_o_limit);
5825 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5827 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5828 valist, f_next_fp_limit);
5829 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5830 valist, f_next_stack);
5834 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5835 || (TREE_CODE (type) == COMPLEX_TYPE
5836 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5841 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5844 addr_rtx = gen_reg_rtx (Pmode);
5845 lab_false = gen_label_rtx ();
5846 lab_over = gen_label_rtx ();
5851 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5852 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5854 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5856 expand_expr (next_fp_limit, NULL_RTX,
5857 Pmode, EXPAND_NORMAL),
5858 GE, const1_rtx, Pmode, 1, lab_false);
5860 if (TYPE_ALIGN (type) > BITS_PER_WORD
5861 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5862 && (n_floatregs & 1)))
5864 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5865 build_int_2 (UNITS_PER_WORD, 0));
5866 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5867 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5868 TREE_SIDE_EFFECTS (tmp) = 1;
5869 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5872 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5873 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5875 emit_move_insn (addr_rtx, r);
5877 emit_jump_insn (gen_jump (lab_over));
5879 emit_label (lab_false);
5881 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5882 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5884 emit_move_insn (addr_rtx, r);
5888 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5889 build_int_2 (rsize, 0));
5891 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5893 expand_expr (next_o_limit, NULL_RTX,
5894 Pmode, EXPAND_NORMAL),
5895 GT, const1_rtx, Pmode, 1, lab_false);
5897 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5898 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5900 emit_move_insn (addr_rtx, r);
5902 emit_jump_insn (gen_jump (lab_over));
5904 emit_label (lab_false);
5906 if (size > 4 && ! TARGET_SH4)
5908 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5909 TREE_SIDE_EFFECTS (tmp) = 1;
5910 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5913 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5914 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5916 emit_move_insn (addr_rtx, r);
5919 emit_label (lab_over);
5921 tmp = make_tree (pptr_type_node, addr_rtx);
5922 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5925 /* ??? In va-sh.h, there had been code to make values larger than
5926 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5928 result = std_expand_builtin_va_arg (valist, type);
5931 #ifdef POINTERS_EXTEND_UNSIGNED
5932 if (GET_MODE (addr) != Pmode)
5933 addr = convert_memory_address (Pmode, result);
5935 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5936 set_mem_alias_set (result, get_varargs_alias_set ());
5938 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5939 argument to the varargs alias set. */
5943 /* Define the offset between two registers, one to be eliminated, and
5944 the other its replacement, at the start of a routine. */
5947 initial_elimination_offset (from, to)
5952 int regs_saved_rounding = 0;
5953 int total_saved_regs_space;
5954 int total_auto_space;
5955 int save_flags = target_flags;
5958 HARD_REG_SET live_regs_mask;
5959 regs_saved = calc_live_regs (&live_regs_mask);
5960 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5961 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5962 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5963 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5965 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5966 copy_flags = target_flags;
5967 target_flags = save_flags;
5969 total_saved_regs_space = regs_saved + regs_saved_rounding;
5971 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5972 return total_saved_regs_space + total_auto_space
5973 + current_function_args_info.byref_regs * 8;
5975 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5976 return total_saved_regs_space + total_auto_space
5977 + current_function_args_info.byref_regs * 8;
5979 /* Initial gap between fp and sp is 0. */
5980 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5983 if (from == RETURN_ADDRESS_POINTER_REGNUM
5984 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5988 int i, n = total_saved_regs_space;
5990 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5992 n += total_auto_space;
5994 /* If it wasn't saved, there's not much we can do. */
5995 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5998 target_flags = copy_flags;
6000 /* We loop twice: first, check 8-byte aligned registers,
6001 that are stored in the higher addresses, that are known
6002 to be aligned. Then, check 32-bit registers that don't
6003 need 8-byte alignment. */
6004 for (align = 1; align >= 0; align--)
6005 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6006 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6008 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6010 if (mode == SFmode && (i % 2) == 1
6011 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6012 && TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1)))
6018 /* If we're doing the aligned pass and this is not aligned,
6019 or we're doing the unaligned pass and this is aligned,
6021 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
6025 n -= GET_MODE_SIZE (mode);
6029 target_flags = save_flags;
6037 return total_auto_space;
6043 /* Handle machine specific pragmas to be semi-compatible with Hitachi
6047 sh_pr_interrupt (pfile)
6048 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6050 pragma_interrupt = 1;
6055 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6057 pragma_interrupt = pragma_trapa = 1;
6061 sh_pr_nosave_low_regs (pfile)
6062 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6064 pragma_nosave_low_regs = 1;
6067 /* Generate 'handle_interrupt' attribute for decls */
6070 sh_insert_attributes (node, attributes)
6074 if (! pragma_interrupt
6075 || TREE_CODE (node) != FUNCTION_DECL)
6078 /* We are only interested in fields. */
6079 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6082 /* Add a 'handle_interrupt' attribute. */
6083 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6088 /* Supported attributes:
6090 interrupt_handler -- specifies this function is an interrupt handler.
6092 sp_switch -- specifies an alternate stack for an interrupt handler
6095 trap_exit -- use a trapa to exit an interrupt function instead of
6096 an rte instruction. */
6098 const struct attribute_spec sh_attribute_table[] =
6100 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6101 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6102 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6103 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6104 { NULL, 0, 0, false, false, false, NULL }
6107 /* Handle an "interrupt_handler" attribute; arguments as in
6108 struct attribute_spec.handler. */
6110 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
6113 tree args ATTRIBUTE_UNUSED;
6114 int flags ATTRIBUTE_UNUSED;
6117 if (TREE_CODE (*node) != FUNCTION_DECL)
6119 warning ("`%s' attribute only applies to functions",
6120 IDENTIFIER_POINTER (name));
6121 *no_add_attrs = true;
6123 else if (TARGET_SHCOMPACT)
6125 error ("attribute interrupt_handler is not compatible with -m5-compact");
6126 *no_add_attrs = true;
6132 /* Handle an "sp_switch" attribute; arguments as in
6133 struct attribute_spec.handler. */
6135 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
6139 int flags ATTRIBUTE_UNUSED;
6142 if (TREE_CODE (*node) != FUNCTION_DECL)
6144 warning ("`%s' attribute only applies to functions",
6145 IDENTIFIER_POINTER (name));
6146 *no_add_attrs = true;
6148 else if (!pragma_interrupt)
6150 /* The sp_switch attribute only has meaning for interrupt functions. */
6151 warning ("`%s' attribute only applies to interrupt functions",
6152 IDENTIFIER_POINTER (name));
6153 *no_add_attrs = true;
6155 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6157 /* The argument must be a constant string. */
6158 warning ("`%s' attribute argument not a string constant",
6159 IDENTIFIER_POINTER (name));
6160 *no_add_attrs = true;
6164 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6165 TREE_STRING_POINTER (TREE_VALUE (args)));
6171 /* Handle an "trap_exit" attribute; arguments as in
6172 struct attribute_spec.handler. */
6174 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
6178 int flags ATTRIBUTE_UNUSED;
6181 if (TREE_CODE (*node) != FUNCTION_DECL)
6183 warning ("`%s' attribute only applies to functions",
6184 IDENTIFIER_POINTER (name));
6185 *no_add_attrs = true;
6187 else if (!pragma_interrupt)
6189 /* The trap_exit attribute only has meaning for interrupt functions. */
6190 warning ("`%s' attribute only applies to interrupt functions",
6191 IDENTIFIER_POINTER (name));
6192 *no_add_attrs = true;
6194 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6196 /* The argument must be a constant integer. */
6197 warning ("`%s' attribute argument not an integer constant",
6198 IDENTIFIER_POINTER (name));
6199 *no_add_attrs = true;
6203 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6210 sh_cfun_interrupt_handler_p ()
6212 return (lookup_attribute ("interrupt_handler",
6213 DECL_ATTRIBUTES (current_function_decl))
6217 /* Predicates used by the templates. */
6219 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
6220 Used only in general_movsrc_operand. */
6223 system_reg_operand (op, mode)
6225 enum machine_mode mode ATTRIBUTE_UNUSED;
6237 /* Returns 1 if OP can be source of a simple move operation.
6238 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
6239 invalid as are subregs of system registers. */
6242 general_movsrc_operand (op, mode)
6244 enum machine_mode mode;
6246 if (GET_CODE (op) == MEM)
6248 rtx inside = XEXP (op, 0);
6249 if (GET_CODE (inside) == CONST)
6250 inside = XEXP (inside, 0);
6252 if (GET_CODE (inside) == LABEL_REF)
6255 if (GET_CODE (inside) == PLUS
6256 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
6257 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
6260 /* Only post inc allowed. */
6261 if (GET_CODE (inside) == PRE_DEC)
6265 if ((mode == QImode || mode == HImode)
6266 && (GET_CODE (op) == SUBREG
6267 && GET_CODE (XEXP (op, 0)) == REG
6268 && system_reg_operand (XEXP (op, 0), mode)))
6271 return general_operand (op, mode);
6274 /* Returns 1 if OP can be a destination of a move.
6275 Same as general_operand, but no preinc allowed. */
6278 general_movdst_operand (op, mode)
6280 enum machine_mode mode;
6282 /* Only pre dec allowed. */
6283 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
6286 return general_operand (op, mode);
6289 /* Returns 1 if OP is a normal arithmetic register. */
6292 arith_reg_operand (op, mode)
6294 enum machine_mode mode;
6296 if (register_operand (op, mode))
6300 if (GET_CODE (op) == REG)
6302 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6303 regno = REGNO (SUBREG_REG (op));
6307 return (regno != T_REG && regno != PR_REG
6308 && ! TARGET_REGISTER_P (regno)
6309 && (regno != FPUL_REG || TARGET_SH4)
6310 && regno != MACH_REG && regno != MACL_REG);
6315 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
6316 because this would lead to missing sign extensions when truncating from
6317 DImode to SImode. */
6319 arith_reg_dest (op, mode)
6321 enum machine_mode mode;
6323 if (mode == DImode && GET_CODE (op) == SUBREG
6324 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
6326 return arith_reg_operand (op, mode);
6330 int_gpr_dest (op, mode)
6332 enum machine_mode mode ATTRIBUTE_UNUSED;
6334 enum machine_mode op_mode = GET_MODE (op);
6336 if (GET_MODE_CLASS (op_mode) != MODE_INT
6337 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
6339 if (! reload_completed)
6341 return true_regnum (op) <= LAST_GENERAL_REG;
6345 fp_arith_reg_operand (op, mode)
6347 enum machine_mode mode;
6349 if (register_operand (op, mode))
6353 if (GET_CODE (op) == REG)
6355 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6356 regno = REGNO (SUBREG_REG (op));
6360 return (regno >= FIRST_PSEUDO_REGISTER
6361 || FP_REGISTER_P (regno));
6366 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
6369 arith_operand (op, mode)
6371 enum machine_mode mode;
6373 if (arith_reg_operand (op, mode))
6378 /* FIXME: We should be checking whether the CONST_INT fits in a
6379 CONST_OK_FOR_J here, but this causes reload_cse to crash when
6380 attempting to transform a sequence of two 64-bit sets of the
6381 same register from literal constants into a set and an add,
6382 when the difference is too wide for an add. */
6383 if (GET_CODE (op) == CONST_INT
6384 || EXTRA_CONSTRAINT_S (op))
6389 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
6395 /* Returns 1 if OP is a valid source operand for a compare insn. */
6398 arith_reg_or_0_operand (op, mode)
6400 enum machine_mode mode;
6402 if (arith_reg_operand (op, mode))
6405 if (EXTRA_CONSTRAINT_U (op))
6411 /* Return 1 if OP is a valid source operand for an SHmedia operation
6412 that takes either a register or a 6-bit immediate. */
6415 shmedia_6bit_operand (op, mode)
6417 enum machine_mode mode;
6419 return (arith_reg_operand (op, mode)
6420 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
6423 /* Returns 1 if OP is a valid source operand for a logical operation. */
6426 logical_operand (op, mode)
6428 enum machine_mode mode;
6430 if (arith_reg_operand (op, mode))
6435 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
6440 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
6447 and_operand (op, mode)
6449 enum machine_mode mode;
6451 if (logical_operand (op, mode))
6454 /* Check mshflo.l / mshflhi.l opportunities. */
6457 && GET_CODE (op) == CONST_INT
6458 && (INTVAL (op) == (unsigned) 0xffffffff
6459 || INTVAL (op) == (HOST_WIDE_INT) -1 << 32))
6465 /* Nonzero if OP is a floating point value with value 0.0. */
6468 fp_zero_operand (op)
6473 if (GET_MODE (op) != SFmode)
6476 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6477 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6480 /* Nonzero if OP is a floating point value with value 1.0. */
6488 if (GET_MODE (op) != SFmode)
6491 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6492 return REAL_VALUES_EQUAL (r, dconst1);
6495 /* For -m4 and -m4-single-only, mode switching is used. If we are
6496 compiling without -mfmovd, movsf_ie isn't taken into account for
6497 mode switching. We could check in machine_dependent_reorg for
6498 cases where we know we are in single precision mode, but there is
6499 interface to find that out during reload, so we must avoid
6500 choosing an fldi alternative during reload and thus failing to
6501 allocate a scratch register for the constant loading. */
6505 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6509 tertiary_reload_operand (op, mode)
6511 enum machine_mode mode ATTRIBUTE_UNUSED;
6513 enum rtx_code code = GET_CODE (op);
6514 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6518 fpscr_operand (op, mode)
6520 enum machine_mode mode ATTRIBUTE_UNUSED;
6522 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6523 && GET_MODE (op) == PSImode);
6527 fpul_operand (op, mode)
6529 enum machine_mode mode;
6532 return fp_arith_reg_operand (op, mode);
6534 return (GET_CODE (op) == REG
6535 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6536 && GET_MODE (op) == mode);
6540 symbol_ref_operand (op, mode)
6542 enum machine_mode mode ATTRIBUTE_UNUSED;
6544 return (GET_CODE (op) == SYMBOL_REF);
6547 /* Return the TLS type for TLS symbols, 0 for otherwise. */
6549 tls_symbolic_operand (op, mode)
6551 enum machine_mode mode ATTRIBUTE_UNUSED;
6553 if (GET_CODE (op) != SYMBOL_REF)
6555 return SYMBOL_REF_TLS_MODEL (op);
6559 commutative_float_operator (op, mode)
6561 enum machine_mode mode;
6563 if (GET_MODE (op) != mode)
6565 switch (GET_CODE (op))
6577 noncommutative_float_operator (op, mode)
6579 enum machine_mode mode;
6581 if (GET_MODE (op) != mode)
6583 switch (GET_CODE (op))
6595 unary_float_operator (op, mode)
6597 enum machine_mode mode;
6599 if (GET_MODE (op) != mode)
6601 switch (GET_CODE (op))
6614 binary_float_operator (op, mode)
6616 enum machine_mode mode;
6618 if (GET_MODE (op) != mode)
6620 switch (GET_CODE (op))
6634 binary_logical_operator (op, mode)
6636 enum machine_mode mode;
6638 if (GET_MODE (op) != mode)
6640 switch (GET_CODE (op))
6653 equality_comparison_operator (op, mode)
6655 enum machine_mode mode;
6657 return ((mode == VOIDmode || GET_MODE (op) == mode)
6658 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6661 int greater_comparison_operator (op, mode)
6663 enum machine_mode mode;
6665 if (mode != VOIDmode && GET_MODE (op) == mode)
6667 switch (GET_CODE (op))
6679 int less_comparison_operator (op, mode)
6681 enum machine_mode mode;
6683 if (mode != VOIDmode && GET_MODE (op) == mode)
6685 switch (GET_CODE (op))
6697 /* Accept pseudos and branch target registers. */
6699 target_reg_operand (op, mode)
6701 enum machine_mode mode;
6704 || GET_MODE (op) != DImode)
6707 if (GET_CODE (op) == SUBREG)
6710 if (GET_CODE (op) != REG)
6713 /* We must protect ourselves from matching pseudos that are virtual
6714 register, because they will eventually be replaced with hardware
6715 registers that aren't branch-target registers. */
6716 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6717 || TARGET_REGISTER_P (REGNO (op)))
6723 /* Same as target_reg_operand, except that label_refs and symbol_refs
6724 are accepted before reload. */
6726 target_operand (op, mode)
6728 enum machine_mode mode;
6733 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6734 && EXTRA_CONSTRAINT_T (op))
6735 return ! reload_completed;
6737 return target_reg_operand (op, mode);
6741 mextr_bit_offset (op, mode)
6743 enum machine_mode mode ATTRIBUTE_UNUSED;
6747 if (GET_CODE (op) != CONST_INT)
6750 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6754 extend_reg_operand (op, mode)
6756 enum machine_mode mode;
6758 return (GET_CODE (op) == TRUNCATE
6760 : arith_reg_operand) (op, mode);
6764 trunc_hi_operand (op, mode)
6766 enum machine_mode mode;
6768 enum machine_mode op_mode = GET_MODE (op);
6770 if (op_mode != SImode && op_mode != DImode
6771 && op_mode != V4HImode && op_mode != V2SImode)
6773 return extend_reg_operand (op, mode);
6777 extend_reg_or_0_operand (op, mode)
6779 enum machine_mode mode;
6781 return (GET_CODE (op) == TRUNCATE
6783 : arith_reg_or_0_operand) (op, mode);
6787 general_extend_operand (op, mode)
6789 enum machine_mode mode;
6791 return (GET_CODE (op) == TRUNCATE
6793 : nonimmediate_operand) (op, mode);
6797 inqhi_operand (op, mode)
6799 enum machine_mode mode;
6801 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6804 /* Can't use true_regnum here because copy_cost wants to know about
6805 SECONDARY_INPUT_RELOAD_CLASS. */
6806 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6810 sh_rep_vec (v, mode)
6812 enum machine_mode mode;
6817 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6818 || (GET_MODE (v) != mode && mode != VOIDmode))
6820 i = XVECLEN (v, 0) - 2;
6821 x = XVECEXP (v, 0, i + 1);
6822 if (GET_MODE_UNIT_SIZE (mode) == 1)
6824 y = XVECEXP (v, 0, i);
6825 for (i -= 2 ; i >= 0; i -= 2)
6826 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6827 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6832 if (XVECEXP (v, 0, i) != x)
6837 /* Determine if V is a constant vector matching MODE with only one element
6838 that is not a sign extension. Two byte-sized elements count as one. */
6840 sh_1el_vec (v, mode)
6842 enum machine_mode mode;
6845 int i, last, least, sign_ix;
6848 if (GET_CODE (v) != CONST_VECTOR
6849 || (GET_MODE (v) != mode && mode != VOIDmode))
6851 /* Determine numbers of last and of least significant elements. */
6852 last = XVECLEN (v, 0) - 1;
6853 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6854 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6857 if (GET_MODE_UNIT_SIZE (mode) == 1)
6858 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6859 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6861 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6862 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6863 ? constm1_rtx : const0_rtx);
6864 i = XVECLEN (v, 0) - 1;
6866 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6873 sh_const_vec (v, mode)
6875 enum machine_mode mode;
6879 if (GET_CODE (v) != CONST_VECTOR
6880 || (GET_MODE (v) != mode && mode != VOIDmode))
6882 i = XVECLEN (v, 0) - 1;
6884 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6889 /* Return the destination address of a branch. */
6892 branch_dest (branch)
6895 rtx dest = SET_SRC (PATTERN (branch));
6898 if (GET_CODE (dest) == IF_THEN_ELSE)
6899 dest = XEXP (dest, 1);
6900 dest = XEXP (dest, 0);
6901 dest_uid = INSN_UID (dest);
6902 return INSN_ADDRESSES (dest_uid);
6905 /* Return nonzero if REG is not used after INSN.
6906 We assume REG is a reload reg, and therefore does
6907 not live past labels. It may live past calls or jumps though. */
6909 reg_unused_after (reg, insn)
6916 /* If the reg is set by this instruction, then it is safe for our
6917 case. Disregard the case where this is a store to memory, since
6918 we are checking a register used in the store address. */
6919 set = single_set (insn);
6920 if (set && GET_CODE (SET_DEST (set)) != MEM
6921 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6924 while ((insn = NEXT_INSN (insn)))
6926 code = GET_CODE (insn);
6929 /* If this is a label that existed before reload, then the register
6930 if dead here. However, if this is a label added by reorg, then
6931 the register may still be live here. We can't tell the difference,
6932 so we just ignore labels completely. */
6933 if (code == CODE_LABEL)
6938 if (code == JUMP_INSN)
6941 /* If this is a sequence, we must handle them all at once.
6942 We could have for instance a call that sets the target register,
6943 and an insn in a delay slot that uses the register. In this case,
6944 we must return 0. */
6945 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6950 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6952 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6953 rtx set = single_set (this_insn);
6955 if (GET_CODE (this_insn) == CALL_INSN)
6957 else if (GET_CODE (this_insn) == JUMP_INSN)
6959 if (INSN_ANNULLED_BRANCH_P (this_insn))
6964 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6966 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6968 if (GET_CODE (SET_DEST (set)) != MEM)
6974 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6979 else if (code == JUMP_INSN)
6982 else if (GET_RTX_CLASS (code) == 'i')
6984 rtx set = single_set (insn);
6986 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6988 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6989 return GET_CODE (SET_DEST (set)) != MEM;
6990 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
6994 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7002 static GTY(()) rtx fpscr_rtx;
7008 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
7009 REG_USERVAR_P (fpscr_rtx) = 1;
7010 mark_user_reg (fpscr_rtx);
7012 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7013 mark_user_reg (fpscr_rtx);
7032 expand_sf_unop (fun, operands)
7033 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7036 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7040 expand_sf_binop (fun, operands)
7041 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7044 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7049 expand_df_unop (fun, operands)
7050 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7053 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7057 expand_df_binop (fun, operands)
7058 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7061 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7065 /* ??? gcc does flow analysis strictly after common subexpression
7066 elimination. As a result, common subexpression elimination fails
7067 when there are some intervening statements setting the same register.
7068 If we did nothing about this, this would hurt the precision switching
7069 for SH4 badly. There is some cse after reload, but it is unable to
7070 undo the extra register pressure from the unused instructions, and
7071 it cannot remove auto-increment loads.
7073 A C code example that shows this flow/cse weakness for (at least) SH
7074 and sparc (as of gcc ss-970706) is this:
7088 So we add another pass before common subexpression elimination, to
7089 remove assignments that are dead due to a following assignment in the
7090 same basic block. */
7093 mark_use (x, reg_set_block)
7094 rtx x, *reg_set_block;
7100 code = GET_CODE (x);
7105 int regno = REGNO (x);
7106 int nregs = (regno < FIRST_PSEUDO_REGISTER
7107 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7111 reg_set_block[regno + nregs - 1] = 0;
7118 rtx dest = SET_DEST (x);
7120 if (GET_CODE (dest) == SUBREG)
7121 dest = SUBREG_REG (dest);
7122 if (GET_CODE (dest) != REG)
7123 mark_use (dest, reg_set_block);
7124 mark_use (SET_SRC (x), reg_set_block);
7131 const char *fmt = GET_RTX_FORMAT (code);
7133 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7136 mark_use (XEXP (x, i), reg_set_block);
7137 else if (fmt[i] == 'E')
7138 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7139 mark_use (XVECEXP (x, i, j), reg_set_block);
7146 static rtx get_free_reg PARAMS ((HARD_REG_SET));
7148 /* This function returns a register to use to load the address to load
7149 the fpscr from. Currently it always returns r1 or r7, but when we are
7150 able to use pseudo registers after combine, or have a better mechanism
7151 for choosing a register, it should be done here. */
7152 /* REGS_LIVE is the liveness information for the point for which we
7153 need this allocation. In some bare-bones exit blocks, r1 is live at the
7154 start. We can even have all of r0..r3 being live:
7155 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7156 INSN before which new insns are placed with will clobber the register
7157 we return. If a basic block consists only of setting the return value
7158 register to a pseudo and using that register, the return value is not
7159 live before or after this block, yet we we'll insert our insns right in
7163 get_free_reg (regs_live)
7164 HARD_REG_SET regs_live;
7166 if (! TEST_HARD_REG_BIT (regs_live, 1))
7167 return gen_rtx_REG (Pmode, 1);
7169 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7170 there shouldn't be anything but a jump before the function end. */
7171 if (! TEST_HARD_REG_BIT (regs_live, 7))
7172 return gen_rtx_REG (Pmode, 7);
7177 /* This function will set the fpscr from memory.
7178 MODE is the mode we are setting it to. */
7180 fpscr_set_from_mem (mode, regs_live)
7182 HARD_REG_SET regs_live;
7184 enum attr_fp_mode fp_mode = mode;
7185 rtx addr_reg = get_free_reg (regs_live);
7187 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7188 emit_insn (gen_fpu_switch1 (addr_reg));
7190 emit_insn (gen_fpu_switch0 (addr_reg));
7193 /* Is the given character a logical line separator for the assembler? */
7194 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7195 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7199 sh_insn_length_adjustment (insn)
7202 /* Instructions with unfilled delay slots take up an extra two bytes for
7203 the nop in the delay slot. */
7204 if (((GET_CODE (insn) == INSN
7205 && GET_CODE (PATTERN (insn)) != USE
7206 && GET_CODE (PATTERN (insn)) != CLOBBER)
7207 || GET_CODE (insn) == CALL_INSN
7208 || (GET_CODE (insn) == JUMP_INSN
7209 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7210 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7211 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7212 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7215 /* SH2e has a bug that prevents the use of annulled branches, so if
7216 the delay slot is not filled, we'll have to put a NOP in it. */
7217 if (sh_cpu == CPU_SH2E
7218 && GET_CODE (insn) == JUMP_INSN
7219 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7220 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7221 && get_attr_type (insn) == TYPE_CBRANCH
7222 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7225 /* sh-dsp parallel processing insn take four bytes instead of two. */
7227 if (GET_CODE (insn) == INSN)
7230 rtx body = PATTERN (insn);
7231 const char *template;
7233 int maybe_label = 1;
7235 if (GET_CODE (body) == ASM_INPUT)
7236 template = XSTR (body, 0);
7237 else if (asm_noperands (body) >= 0)
7239 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7248 while (c == ' ' || c == '\t');
7249 /* all sh-dsp parallel-processing insns start with p.
7250 The only non-ppi sh insn starting with p is pref.
7251 The only ppi starting with pr is prnd. */
7252 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7254 /* The repeat pseudo-insn expands two three insns, a total of
7255 six bytes in size. */
7256 else if ((c == 'r' || c == 'R')
7257 && ! strncasecmp ("epeat", template, 5))
7259 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7261 /* If this is a label, it is obviously not a ppi insn. */
7262 if (c == ':' && maybe_label)
7267 else if (c == '\'' || c == '"')
7272 maybe_label = c != ':';
7280 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7281 isn't protected by a PIC unspec. */
7283 nonpic_symbol_mentioned_p (x)
7286 register const char *fmt;
7289 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7290 || GET_CODE (x) == PC)
7293 /* We don't want to look into the possible MEM location of a
7294 CONST_DOUBLE, since we're not going to use it, in general. */
7295 if (GET_CODE (x) == CONST_DOUBLE)
7298 if (GET_CODE (x) == UNSPEC
7299 && (XINT (x, 1) == UNSPEC_PIC
7300 || XINT (x, 1) == UNSPEC_GOT
7301 || XINT (x, 1) == UNSPEC_GOTOFF
7302 || XINT (x, 1) == UNSPEC_GOTPLT
7303 || XINT (x, 1) == UNSPEC_GOTTPOFF
7304 || XINT (x, 1) == UNSPEC_DTPOFF
7305 || XINT (x, 1) == UNSPEC_PLT))
7308 fmt = GET_RTX_FORMAT (GET_CODE (x));
7309 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7315 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7316 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
7319 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
7326 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
7327 @GOTOFF in `reg'. */
7329 legitimize_pic_address (orig, mode, reg)
7331 enum machine_mode mode ATTRIBUTE_UNUSED;
7334 if (tls_symbolic_operand (orig, Pmode))
7337 if (GET_CODE (orig) == LABEL_REF
7338 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
7341 reg = gen_reg_rtx (Pmode);
7343 emit_insn (gen_symGOTOFF2reg (reg, orig));
7346 else if (GET_CODE (orig) == SYMBOL_REF)
7349 reg = gen_reg_rtx (Pmode);
7351 emit_insn (gen_symGOT2reg (reg, orig));
7357 /* Mark the use of a constant in the literal table. If the constant
7358 has multiple labels, make it unique. */
7360 mark_constant_pool_use (x)
7363 rtx insn, lab, pattern;
7368 switch (GET_CODE (x))
7378 /* Get the first label in the list of labels for the same constant
7379 and delete another labels in the list. */
7381 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7383 if (GET_CODE (insn) != CODE_LABEL
7384 || LABEL_REFS (insn) != NEXT_INSN (insn))
7389 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
7390 INSN_DELETED_P (insn) = 1;
7392 /* Mark constants in a window. */
7393 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
7395 if (GET_CODE (insn) != INSN)
7398 pattern = PATTERN (insn);
7399 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
7402 switch (XINT (pattern, 1))
7404 case UNSPECV_CONST2:
7405 case UNSPECV_CONST4:
7406 case UNSPECV_CONST8:
7407 XVECEXP (pattern, 0, 1) = const1_rtx;
7409 case UNSPECV_WINDOW_END:
7410 if (XVECEXP (pattern, 0, 0) == x)
7413 case UNSPECV_CONST_END:
7423 /* Return true if it's possible to redirect BRANCH1 to the destination
7424 of an unconditional jump BRANCH2. We only want to do this if the
7425 resulting branch will have a short displacement. */
7427 sh_can_redirect_branch (branch1, branch2)
7431 if (flag_expensive_optimizations && simplejump_p (branch2))
7433 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
7437 for (distance = 0, insn = NEXT_INSN (branch1);
7438 insn && distance < 256;
7439 insn = PREV_INSN (insn))
7444 distance += get_attr_length (insn);
7446 for (distance = 0, insn = NEXT_INSN (branch1);
7447 insn && distance < 256;
7448 insn = NEXT_INSN (insn))
7453 distance += get_attr_length (insn);
7459 /* Return nonzero if register old_reg can be renamed to register new_reg. */
7461 sh_hard_regno_rename_ok (old_reg, new_reg)
7462 unsigned int old_reg ATTRIBUTE_UNUSED;
7463 unsigned int new_reg;
7466 /* Interrupt functions can only use registers that have already been
7467 saved by the prologue, even if they would normally be
7470 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
7476 /* Function to update the integer COST
7477 based on the relationship between INSN that is dependent on
7478 DEP_INSN through the dependence LINK. The default is to make no
7479 adjustment to COST. This can be used for example to specify to
7480 the scheduler that an output- or anti-dependence does not incur
7481 the same cost as a data-dependence. The return value should be
7482 the new value for COST. */
7484 sh_adjust_cost (insn, link, dep_insn, cost)
7486 rtx link ATTRIBUTE_UNUSED;
7494 /* On SHmedia, if the dependence is an anti-dependence or
7495 output-dependence, there is no cost. */
7496 if (REG_NOTE_KIND (link) != 0)
7499 if (get_attr_is_mac_media (insn)
7500 && get_attr_is_mac_media (dep_insn))
7503 else if (REG_NOTE_KIND (link) == 0)
7505 enum attr_type dep_type, type;
7507 if (recog_memoized (insn) < 0
7508 || recog_memoized (dep_insn) < 0)
7511 dep_type = get_attr_type (dep_insn);
7512 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7514 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7515 && (type = get_attr_type (insn)) != TYPE_CALL
7516 && type != TYPE_SFUNC)
7519 /* The only input for a call that is timing-critical is the
7520 function's address. */
7521 if (GET_CODE(insn) == CALL_INSN)
7523 rtx call = PATTERN (insn);
7525 if (GET_CODE (call) == PARALLEL)
7526 call = XVECEXP (call, 0 ,0);
7527 if (GET_CODE (call) == SET)
7528 call = SET_SRC (call);
7529 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7530 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7533 /* Likewise, the most timing critical input for an sfuncs call
7534 is the function address. However, sfuncs typically start
7535 using their arguments pretty quickly.
7536 Assume a four cycle delay before they are needed. */
7537 /* All sfunc calls are parallels with at least four components.
7538 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7539 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7540 && XVECLEN (PATTERN (insn), 0) >= 4
7541 && (reg = sfunc_uses_reg (insn)))
7543 if (! reg_set_p (reg, dep_insn))
7546 /* When the preceding instruction loads the shift amount of
7547 the following SHAD/SHLD, the latency of the load is increased
7550 && get_attr_type (insn) == TYPE_DYN_SHIFT
7551 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7552 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7553 XEXP (SET_SRC (single_set(insn)),
7556 /* When an LS group instruction with a latency of less than
7557 3 cycles is followed by a double-precision floating-point
7558 instruction, FIPR, or FTRV, the latency of the first
7559 instruction is increased to 3 cycles. */
7561 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7562 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7564 /* The lsw register of a double-precision computation is ready one
7566 else if (reload_completed
7567 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7568 && (use_pat = single_set (insn))
7569 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7573 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7574 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7577 /* An anti-dependence penalty of two applies if the first insn is a double
7578 precision fadd / fsub / fmul. */
7579 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7580 && recog_memoized (dep_insn) >= 0
7581 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7582 /* A lot of alleged anti-flow dependences are fake,
7583 so check this one is real. */
7584 && flow_dependent_p (dep_insn, insn))
7591 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7592 if DEP_INSN is anti-flow dependent on INSN. */
7594 flow_dependent_p (insn, dep_insn)
7597 rtx tmp = PATTERN (insn);
7599 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7600 return tmp == NULL_RTX;
7603 /* A helper function for flow_dependent_p called through note_stores. */
7605 flow_dependent_p_1 (x, pat, data)
7607 rtx pat ATTRIBUTE_UNUSED;
7610 rtx * pinsn = (rtx *) data;
7612 if (*pinsn && reg_referenced_p (x, *pinsn))
7616 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7617 'special function' patterns (type sfunc) that clobber pr, but that
7618 do not look like function calls to leaf_function_p. Hence we must
7619 do this extra check. */
7623 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7626 /* This Function returns nonzero if the DFA based scheduler interface
7627 is to be used. At present this is supported for the SH4 only. */
7629 sh_use_dfa_interface()
7631 if (TARGET_HARD_SH4)
7637 /* This function returns "2" to indicate dual issue for the SH4
7638 processor. To be used by the DFA pipeline description. */
7642 if (TARGET_SUPERSCALAR)
7648 /* SHmedia requires registers for branches, so we can't generate new
7649 branches past reload. */
7651 sh_cannot_modify_jumps_p ()
7653 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7657 sh_ms_bitfield_layout_p (record_type)
7658 tree record_type ATTRIBUTE_UNUSED;
7664 On the SH1..SH4, the trampoline looks like
7665 2 0002 D202 mov.l l2,r2
7666 1 0000 D301 mov.l l1,r3
7669 5 0008 00000000 l1: .long area
7670 6 000c 00000000 l2: .long function
7672 SH5 (compact) uses r1 instead of r3 for the static chain. */
7675 /* Emit RTL insns to initialize the variable parts of a trampoline.
7676 FNADDR is an RTX for the address of the function's pure code.
7677 CXT is an RTX for the static chain value for the function. */
7680 sh_initialize_trampoline (tramp, fnaddr, cxt)
7681 rtx tramp, fnaddr, cxt;
7683 if (TARGET_SHMEDIA64)
7688 rtx movi1 = GEN_INT (0xcc000010);
7689 rtx shori1 = GEN_INT (0xc8000010);
7692 /* The following trampoline works within a +- 128 KB range for cxt:
7693 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7694 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7695 gettr tr1,r1; blink tr0,r63 */
7696 /* Address rounding makes it hard to compute the exact bounds of the
7697 offset for this trampoline, but we have a rather generous offset
7698 range, so frame_offset should do fine as an upper bound. */
7699 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7701 /* ??? could optimize this trampoline initialization
7702 by writing DImode words with two insns each. */
7703 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7704 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7705 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7706 insn = gen_rtx_AND (DImode, insn, mask);
7707 /* Or in ptb/u .,tr1 pattern */
7708 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7709 insn = force_operand (insn, NULL_RTX);
7710 insn = gen_lowpart (SImode, insn);
7711 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7712 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7713 insn = gen_rtx_AND (DImode, insn, mask);
7714 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7715 insn = gen_lowpart (SImode, insn);
7716 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7717 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7718 insn = gen_rtx_AND (DImode, insn, mask);
7719 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7720 insn = gen_lowpart (SImode, insn);
7721 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7722 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7723 insn = gen_rtx_AND (DImode, insn, mask);
7724 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7725 insn = gen_lowpart (SImode, insn);
7726 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7728 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7729 insn = gen_rtx_AND (DImode, insn, mask);
7730 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7731 insn = gen_lowpart (SImode, insn);
7732 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7734 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7735 GEN_INT (0x6bf10600));
7736 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7737 GEN_INT (0x4415fc10));
7738 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7739 GEN_INT (0x4401fff0));
7740 emit_insn (gen_ic_invalidate_line (tramp));
7743 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7744 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7746 tramp_templ = gen_datalabel_ref (tramp_templ);
7747 dst = gen_rtx_MEM (BLKmode, tramp);
7748 src = gen_rtx_MEM (BLKmode, tramp_templ);
7749 set_mem_align (dst, 256);
7750 set_mem_align (src, 64);
7751 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7753 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7755 emit_move_insn (gen_rtx_MEM (Pmode,
7756 plus_constant (tramp,
7758 + GET_MODE_SIZE (Pmode))),
7760 emit_insn (gen_ic_invalidate_line (tramp));
7763 else if (TARGET_SHMEDIA)
7765 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7766 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7767 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7768 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7769 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7770 rotated 10 right, and higher 16 bit of every 32 selected. */
7772 = force_reg (V2HImode, (simplify_gen_subreg
7773 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7774 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7775 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7777 tramp = force_reg (Pmode, tramp);
7778 fnaddr = force_reg (SImode, fnaddr);
7779 cxt = force_reg (SImode, cxt);
7780 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7781 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7783 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
7784 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7785 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7786 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7787 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7788 gen_rtx_SUBREG (V2HImode, cxt, 0),
7790 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
7791 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7792 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7793 if (TARGET_LITTLE_ENDIAN)
7795 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7796 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7800 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7801 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7803 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7804 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7805 emit_insn (gen_ic_invalidate_line (tramp));
7808 else if (TARGET_SHCOMPACT)
7810 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7813 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7814 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7816 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7817 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7819 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7821 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7825 if (TARGET_USERMODE)
7826 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),
7827 0, VOIDmode, 1, tramp, SImode);
7829 emit_insn (gen_ic_invalidate_line (tramp));
7833 /* FIXME: This is overly conservative. A SHcompact function that
7834 receives arguments ``by reference'' will have them stored in its
7835 own stack frame, so it must not pass pointers or references to
7836 these arguments to other functions by means of sibling calls. */
7838 sh_function_ok_for_sibcall (decl, exp)
7840 tree exp ATTRIBUTE_UNUSED;
7843 && (! TARGET_SHCOMPACT
7844 || current_function_args_info.stack_regs == 0)
7845 && ! sh_cfun_interrupt_handler_p ());
7848 /* Machine specific built-in functions. */
7850 struct builtin_description
7852 const enum insn_code icode;
7853 const char *const name;
7857 /* describe number and signedness of arguments; arg[0] == result
7858 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7859 static const char signature_args[][4] =
7861 #define SH_BLTIN_V2SI2 0
7863 #define SH_BLTIN_V4HI2 1
7865 #define SH_BLTIN_V2SI3 2
7867 #define SH_BLTIN_V4HI3 3
7869 #define SH_BLTIN_V8QI3 4
7871 #define SH_BLTIN_MAC_HISI 5
7873 #define SH_BLTIN_SH_HI 6
7875 #define SH_BLTIN_SH_SI 7
7877 #define SH_BLTIN_V4HI2V2SI 8
7879 #define SH_BLTIN_V4HI2V8QI 9
7881 #define SH_BLTIN_SISF 10
7883 #define SH_BLTIN_LDUA_L 11
7885 #define SH_BLTIN_LDUA_Q 12
7887 #define SH_BLTIN_STUA_L 13
7889 #define SH_BLTIN_STUA_Q 14
7891 #define SH_BLTIN_UDI 15
7893 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
7894 #define SH_BLTIN_2 16
7895 #define SH_BLTIN_SU 16
7897 #define SH_BLTIN_3 17
7898 #define SH_BLTIN_SUS 17
7900 #define SH_BLTIN_PSSV 18
7902 #define SH_BLTIN_XXUU 19
7903 #define SH_BLTIN_UUUU 19
7905 #define SH_BLTIN_PV 20
7908 /* mcmv: operands considered unsigned. */
7909 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
7910 /* mperm: control value considered unsigned int. */
7911 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
7912 /* mshards_q: returns signed short. */
7913 /* nsb: takes long long arg, returns unsigned char. */
7914 static const struct builtin_description bdesc[] =
7916 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
7917 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
7918 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
7919 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
7920 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
7921 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
7922 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
7924 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7925 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7927 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
7928 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
7929 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
7930 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
7931 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
7932 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
7933 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
7934 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
7935 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
7936 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
7937 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
7938 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
7939 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
7940 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
7941 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
7942 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
7943 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
7944 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
7945 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
7946 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
7947 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
7948 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
7949 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
7950 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
7951 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
7952 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
7953 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
7954 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
7955 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
7956 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
7957 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
7958 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
7959 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
7960 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
7961 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
7962 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
7963 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
7964 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
7965 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
7966 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
7967 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
7968 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
7969 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
7970 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
7971 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
7972 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
7973 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
7974 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
7975 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
7976 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
7977 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
7978 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
7979 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
7980 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
7982 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7983 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7984 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7985 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7986 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7987 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7988 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7989 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7990 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7991 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7992 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7993 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7994 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7995 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7996 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7997 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7999 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
8000 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
8002 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8003 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8008 sh_media_init_builtins ()
8010 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8011 const struct builtin_description *d;
8013 memset (shared, 0, sizeof shared);
8014 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
8016 tree type, arg_type;
8017 int signature = d->signature;
8020 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8021 type = shared[signature];
8024 int has_result = signature_args[signature][0] != 0;
8026 if (signature_args[signature][1] == 8
8027 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8029 if (! TARGET_FPU_ANY
8030 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8032 type = void_list_node;
8035 int arg = signature_args[signature][i];
8036 int opno = i - 1 + has_result;
8039 arg_type = ptr_type_node;
8041 arg_type = ((*lang_hooks.types.type_for_mode)
8042 (insn_data[d->icode].operand[opno].mode,
8047 arg_type = void_type_node;
8050 type = tree_cons (NULL_TREE, arg_type, type);
8052 type = build_function_type (arg_type, type);
8053 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8054 shared[signature] = type;
8056 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8065 sh_media_init_builtins ();
8068 /* Expand an expression EXP that calls a built-in function,
8069 with result going to TARGET if that's convenient
8070 (and in mode MODE if that's convenient).
8071 SUBTARGET may be used as the target for computing one of EXP's operands.
8072 IGNORE is nonzero if the value is to be ignored. */
8075 sh_expand_builtin (exp, target, subtarget, mode, ignore)
8078 rtx subtarget ATTRIBUTE_UNUSED;
8079 enum machine_mode mode ATTRIBUTE_UNUSED;
8082 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8083 tree arglist = TREE_OPERAND (exp, 1);
8084 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8085 const struct builtin_description *d = &bdesc[fcode];
8086 enum insn_code icode = d->icode;
8087 int signature = d->signature;
8088 enum machine_mode tmode = VOIDmode;
8093 if (signature_args[signature][0])
8098 tmode = insn_data[icode].operand[0].mode;
8100 || GET_MODE (target) != tmode
8101 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8102 target = gen_reg_rtx (tmode);
8108 for (i = 1; i <= 3; i++, nop++)
8111 enum machine_mode opmode, argmode;
8113 if (! signature_args[signature][i])
8115 arg = TREE_VALUE (arglist);
8116 if (arg == error_mark_node)
8118 arglist = TREE_CHAIN (arglist);
8119 opmode = insn_data[icode].operand[nop].mode;
8120 argmode = TYPE_MODE (TREE_TYPE (arg));
8121 if (argmode != opmode)
8122 arg = build1 (NOP_EXPR,
8123 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8124 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8125 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8126 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8132 pat = (*insn_data[d->icode].genfun) (op[0]);
8135 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8138 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8141 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8153 sh_expand_unop_v2sf (code, op0, op1)
8157 rtx sel0 = const0_rtx;
8158 rtx sel1 = const1_rtx;
8159 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
8160 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8162 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8163 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8167 sh_expand_binop_v2sf (code, op0, op1, op2)
8171 rtx sel0 = const0_rtx;
8172 rtx sel1 = const1_rtx;
8173 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx))
8175 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8177 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8178 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8181 /* Return the class of registers for which a mode change from FROM to TO
8184 sh_cannot_change_mode_class (from, to, class)
8185 enum machine_mode from, to;
8186 enum reg_class class;
8188 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8190 if (TARGET_LITTLE_ENDIAN)
8192 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8193 return reg_classes_intersect_p (DF_REGS, class);
8197 if (GET_MODE_SIZE (from) < 8)
8198 return reg_classes_intersect_p (DF_HI_REGS, class);
8205 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8206 that label is used. */
8209 sh_mark_label (address, nuses)
8213 if (GOTOFF_P (address))
8215 /* Extract the label or symbol. */
8216 address = XEXP (address, 0);
8217 if (GET_CODE (address) == PLUS)
8218 address = XEXP (address, 0);
8219 address = XVECEXP (address, 0, 0);
8221 if (GET_CODE (address) == LABEL_REF
8222 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8223 LABEL_NUSES (XEXP (address, 0)) += nuses;
8226 /* Compute extra cost of moving data between one register class
8229 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8230 uses this information. Hence, the general register <-> floating point
8231 register information here is not used for SFmode. */
8234 sh_register_move_cost (mode, srcclass, dstclass)
8235 enum machine_mode mode;
8236 enum reg_class srcclass, dstclass;
8238 if (dstclass == T_REGS || dstclass == PR_REGS)
8241 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8242 && REGCLASS_HAS_FP_REG (srcclass)
8243 && REGCLASS_HAS_FP_REG (dstclass))
8246 if ((REGCLASS_HAS_FP_REG (dstclass)
8247 && REGCLASS_HAS_GENERAL_REG (srcclass))
8248 || (REGCLASS_HAS_GENERAL_REG (dstclass)
8249 && REGCLASS_HAS_FP_REG (srcclass)))
8250 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
8251 * ((GET_MODE_SIZE (mode) + 7) / 8U));
8253 if ((dstclass == FPUL_REGS
8254 && REGCLASS_HAS_GENERAL_REG (srcclass))
8255 || (srcclass == FPUL_REGS
8256 && REGCLASS_HAS_GENERAL_REG (dstclass)))
8259 if ((dstclass == FPUL_REGS
8260 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
8261 || (srcclass == FPUL_REGS
8262 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
8265 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8266 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8269 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8270 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8275 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
8276 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
8277 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
8279 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
8282 /* Like register_operand, but take into account that SHMEDIA can use
8283 the constant zero like a general register. */
8285 sh_register_operand (op, mode)
8287 enum machine_mode mode;
8289 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
8291 return register_operand (op, mode);
8294 static rtx emit_load_ptr PARAMS ((rtx, rtx));
8297 emit_load_ptr (reg, addr)
8300 rtx mem = gen_rtx_MEM (ptr_mode, addr);
8302 if (Pmode != ptr_mode)
8303 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
8304 return emit_move_insn (reg, mem);
8308 sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
8310 tree thunk_fndecl ATTRIBUTE_UNUSED;
8311 HOST_WIDE_INT delta;
8312 HOST_WIDE_INT vcall_offset;
8315 CUMULATIVE_ARGS cum;
8316 int structure_value_byref = 0;
8317 rtx this, this_value, sibcall, insns, funexp;
8318 tree funtype = TREE_TYPE (function);
8320 = (TARGET_SHMEDIA ? CONST_OK_FOR_J (delta) : CONST_OK_FOR_I (delta));
8322 rtx scratch0, scratch1, scratch2;
8324 reload_completed = 1;
8326 current_function_uses_only_leaf_regs = 1;
8328 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8330 /* Find the "this" pointer. We have such a wide range of ABIs for the
8331 SH that it's best to do this completely machine independently.
8332 "this" is passed as first argument, unless a structure return pointer
8333 comes first, in which case "this" comes second. */
8334 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0);
8335 #ifndef PCC_STATIC_STRUCT_RETURN
8336 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
8337 structure_value_byref = 1;
8338 #endif /* not PCC_STATIC_STRUCT_RETURN */
8339 if (structure_value_byref && struct_value_rtx == 0)
8341 tree ptype = build_pointer_type (TREE_TYPE (funtype));
8343 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
8345 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
8347 /* For SHcompact, we only have r0 for a scratch register: r1 is the
8348 static chain pointer (even if you can't have nested virtual functions
8349 right now, someone might implement them sometime), and the rest of the
8350 registers are used for argument passing, are callee-saved, or reserved. */
8351 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
8354 scratch1 = gen_rtx_REG (ptr_mode, 1);
8355 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
8356 pointing where to return struct values. */
8357 scratch2 = gen_rtx_REG (Pmode, 3);
8359 else if (TARGET_SHMEDIA)
8361 scratch1 = gen_rtx_REG (ptr_mode, 21);
8362 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
8365 this_value = plus_constant (this, delta);
8367 && (simple_add || scratch0 != scratch1)
8368 && strict_memory_address_p (ptr_mode, this_value))
8370 emit_load_ptr (scratch0, this_value);
8376 else if (simple_add)
8377 emit_move_insn (this, this_value);
8380 emit_move_insn (scratch1, GEN_INT (delta));
8381 emit_insn (gen_add2_insn (this, scratch1));
8389 emit_load_ptr (scratch0, this);
8391 offset_addr = plus_constant (scratch0, vcall_offset);
8392 if (strict_memory_address_p (ptr_mode, offset_addr))
8394 else if (! TARGET_SH5)
8396 /* scratch0 != scratch1, and we have indexed loads. Get better
8397 schedule by loading the offset into r1 and using an indexed
8398 load - then the load of r1 can issue before the load from
8399 (this + delta) finishes. */
8400 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8401 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
8403 else if (TARGET_SHMEDIA
8404 ? CONST_OK_FOR_J (vcall_offset)
8405 : CONST_OK_FOR_I (vcall_offset))
8407 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
8408 offset_addr = scratch0;
8410 else if (scratch0 != scratch1)
8412 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8413 emit_insn (gen_add2_insn (scratch0, scratch1));
8414 offset_addr = scratch0;
8417 abort (); /* FIXME */
8418 emit_load_ptr (scratch0, offset_addr);
8420 if (Pmode != ptr_mode)
8421 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
8422 emit_insn (gen_add2_insn (this, scratch0));
8425 /* Generate a tail call to the target function. */
8426 if (! TREE_USED (function))
8428 assemble_external (function);
8429 TREE_USED (function) = 1;
8431 funexp = XEXP (DECL_RTL (function), 0);
8432 emit_move_insn (scratch2, funexp);
8433 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
8434 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
8435 SIBLING_CALL_P (sibcall) = 1;
8436 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
8439 /* Run just enough of rest_of_compilation to do scheduling and get
8440 the insns emitted. Note that use_thunk calls
8441 assemble_start_function and assemble_end_function. */
8442 insns = get_insns ();
8444 if (optimize > 0 && flag_schedule_insns_after_reload)
8447 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
8448 life_analysis (insns, rtl_dump_file, PROP_FINAL);
8450 split_all_insns (1);
8452 schedule_insns (rtl_dump_file);
8457 if (optimize > 0 && flag_delayed_branch)
8458 dbr_schedule (insns, rtl_dump_file);
8459 shorten_branches (insns);
8460 final_start_function (insns, file, 1);
8461 final (insns, file, 1, 0);
8462 final_end_function ();
8464 if (optimize > 0 && flag_schedule_insns_after_reload)
8466 /* Release all memory allocated by flow. */
8467 free_basic_block_vars (0);
8469 /* Release all memory held by regsets now. */
8470 regset_release_memory ();
8473 reload_completed = 0;