1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
54 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
55 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
57 /* These are some macros to abstract register modes. */
58 #define CONST_OK_FOR_ADD(size) \
59 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
60 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
61 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
62 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
64 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
65 int current_function_interrupt;
67 /* ??? The pragma interrupt support will not work for SH3. */
68 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
69 output code for the next function appropriate for an interrupt handler. */
72 /* This is set by the trap_exit attribute for functions. It specifies
73 a trap number to be used in a trapa instruction at function exit
74 (instead of an rte instruction). */
77 /* This is used by the sp_switch attribute for functions. It specifies
78 a variable holding the address of the stack the interrupt function
79 should switch to/from at entry/exit. */
82 /* This is set by #pragma trapa, and is similar to the above, except that
83 the compiler doesn't emit code to preserve all registers. */
84 static int pragma_trapa;
86 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
87 which has a separate set of low regs for User and Supervisor modes.
88 This should only be used for the lowest level of interrupts. Higher levels
89 of interrupts must save the registers in case they themselves are
91 int pragma_nosave_low_regs;
93 /* This is used for communication between SETUP_INCOMING_VARARGS and
94 sh_expand_prologue. */
95 int current_function_anonymous_args;
97 /* Global variables for machine-dependent things. */
99 /* Which cpu are we scheduling for. */
100 enum processor_type sh_cpu;
102 /* Saved operands from the last compare to use when we generate an scc
108 /* Provides the class number of the smallest class containing
111 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
113 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
150 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
154 char sh_register_names[FIRST_PSEUDO_REGISTER] \
155 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
157 char sh_additional_register_names[ADDREGNAMES_SIZE] \
158 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
159 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
161 /* Provide reg_class from a letter such as appears in the machine
162 description. *: target independently reserved letter.
163 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
165 enum reg_class reg_class_from_letter[] =
167 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
168 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
169 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
170 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
171 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
172 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
173 /* y */ FPUL_REGS, /* z */ R0_REGS
176 int assembler_dialect;
178 static bool shmedia_space_reserved_for_target_registers;
180 static void split_branches PARAMS ((rtx));
181 static int branch_dest PARAMS ((rtx));
182 static void force_into PARAMS ((rtx, rtx));
183 static void print_slot PARAMS ((rtx));
184 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
185 static void dump_table PARAMS ((rtx));
186 static int hi_const PARAMS ((rtx));
187 static int broken_move PARAMS ((rtx));
188 static int mova_p PARAMS ((rtx));
189 static rtx find_barrier PARAMS ((int, rtx, rtx));
190 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
191 static rtx gen_block_redirect PARAMS ((rtx, int, int));
192 static void sh_reorg PARAMS ((void));
193 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
194 static rtx frame_insn PARAMS ((rtx));
195 static rtx push PARAMS ((int));
196 static void pop PARAMS ((int));
197 static void push_regs PARAMS ((HARD_REG_SET *, int));
198 static int calc_live_regs PARAMS ((HARD_REG_SET *));
199 static void mark_use PARAMS ((rtx, rtx *));
200 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
201 static rtx mark_constant_pool_use PARAMS ((rtx));
202 const struct attribute_spec sh_attribute_table[];
203 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
204 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
205 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
206 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
207 static void sh_insert_attributes PARAMS ((tree, tree *));
208 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
209 static int sh_use_dfa_interface PARAMS ((void));
210 static int sh_issue_rate PARAMS ((void));
211 static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
213 static bool sh_cannot_modify_jumps_p PARAMS ((void));
214 static int sh_target_reg_class (void);
215 static bool sh_optimize_target_register_callee_saved (bool);
216 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
218 static void sh_init_builtins PARAMS ((void));
219 static void sh_media_init_builtins PARAMS ((void));
220 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
221 static void sh_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
222 HOST_WIDE_INT, tree));
223 static void sh_file_start PARAMS ((void));
224 static int flow_dependent_p PARAMS ((rtx, rtx));
225 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
226 static int shiftcosts PARAMS ((rtx));
227 static int andcosts PARAMS ((rtx));
228 static int addsubcosts PARAMS ((rtx));
229 static int multcosts PARAMS ((rtx));
230 static bool unspec_caller_rtx_p PARAMS ((rtx));
231 static bool sh_cannot_copy_insn_p PARAMS ((rtx));
232 static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
233 static int sh_address_cost PARAMS ((rtx));
234 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
235 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
236 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
238 /* Initialize the GCC target structure. */
239 #undef TARGET_ATTRIBUTE_TABLE
240 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
242 /* The next two are used for debug info when compiling with -gdwarf. */
243 #undef TARGET_ASM_UNALIGNED_HI_OP
244 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
245 #undef TARGET_ASM_UNALIGNED_SI_OP
246 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
248 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
249 #undef TARGET_ASM_UNALIGNED_DI_OP
250 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
251 #undef TARGET_ASM_ALIGNED_DI_OP
252 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
254 #undef TARGET_ASM_FUNCTION_EPILOGUE
255 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
257 #undef TARGET_ASM_OUTPUT_MI_THUNK
258 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
260 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
261 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
263 #undef TARGET_ASM_FILE_START
264 #define TARGET_ASM_FILE_START sh_file_start
265 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
266 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
268 #undef TARGET_INSERT_ATTRIBUTES
269 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
271 #undef TARGET_SCHED_ADJUST_COST
272 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
274 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
275 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
277 #undef TARGET_SCHED_ISSUE_RATE
278 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
280 #undef TARGET_CANNOT_MODIFY_JUMPS_P
281 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
282 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
283 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
284 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
285 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
286 sh_optimize_target_register_callee_saved
288 #undef TARGET_MS_BITFIELD_LAYOUT_P
289 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
291 #undef TARGET_INIT_BUILTINS
292 #define TARGET_INIT_BUILTINS sh_init_builtins
293 #undef TARGET_EXPAND_BUILTIN
294 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
296 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
297 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
299 #undef TARGET_CANNOT_COPY_INSN_P
300 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
301 #undef TARGET_RTX_COSTS
302 #define TARGET_RTX_COSTS sh_rtx_costs
303 #undef TARGET_ADDRESS_COST
304 #define TARGET_ADDRESS_COST sh_address_cost
306 #undef TARGET_MACHINE_DEPENDENT_REORG
307 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
310 #undef TARGET_HAVE_TLS
311 #define TARGET_HAVE_TLS true
314 struct gcc_target targetm = TARGET_INITIALIZER;
316 /* Print the operand address in x to the stream. */
319 print_operand_address (stream, x)
323 switch (GET_CODE (x))
327 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
332 rtx base = XEXP (x, 0);
333 rtx index = XEXP (x, 1);
335 switch (GET_CODE (index))
338 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
339 reg_names[true_regnum (base)]);
345 int base_num = true_regnum (base);
346 int index_num = true_regnum (index);
348 fprintf (stream, "@(r0,%s)",
349 reg_names[MAX (base_num, index_num)]);
361 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
365 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
369 x = mark_constant_pool_use (x);
370 output_addr_const (stream, x);
375 /* Print operand x (an rtx) in assembler syntax to file stream
376 according to modifier code.
378 '.' print a .s if insn needs delay slot
379 ',' print LOCAL_LABEL_PREFIX
380 '@' print trap, rte or rts depending upon pragma interruptness
381 '#' output a nop if there is nothing to put in the delay slot
382 ''' print likelyhood suffix (/u for unlikely).
383 'O' print a constant without the #
384 'R' print the LSW of a dp value - changes if in little endian
385 'S' print the MSW of a dp value - changes if in little endian
386 'T' print the next word of a dp value - same as 'R' in big endian mode.
387 'M' print an `x' if `m' will print `base,index'.
388 'N' print 'r63' if the operand is (const_int 0).
389 'm' print a pair `base,offset' or `base,index', for LD and ST.
390 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
391 'o' output an operator. */
394 print_operand (stream, x, code)
403 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
404 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
405 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
408 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
412 fprintf (stream, "trapa #%d", trap_exit);
413 else if (sh_cfun_interrupt_handler_p ())
414 fprintf (stream, "rte");
416 fprintf (stream, "rts");
419 /* Output a nop if there's nothing in the delay slot. */
420 if (dbr_sequence_length () == 0)
421 fprintf (stream, "\n\tnop");
425 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
427 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
428 fputs ("/u", stream);
432 x = mark_constant_pool_use (x);
433 output_addr_const (stream, x);
436 fputs (reg_names[REGNO (x) + LSW], (stream));
439 fputs (reg_names[REGNO (x) + MSW], (stream));
442 /* Next word of a double. */
443 switch (GET_CODE (x))
446 fputs (reg_names[REGNO (x) + 1], (stream));
449 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
450 && GET_CODE (XEXP (x, 0)) != POST_INC)
451 x = adjust_address (x, SImode, 4);
452 print_operand_address (stream, XEXP (x, 0));
459 switch (GET_CODE (x))
461 case PLUS: fputs ("add", stream); break;
462 case MINUS: fputs ("sub", stream); break;
463 case MULT: fputs ("mul", stream); break;
464 case DIV: fputs ("div", stream); break;
465 case EQ: fputs ("eq", stream); break;
466 case NE: fputs ("ne", stream); break;
467 case GT: case LT: fputs ("gt", stream); break;
468 case GE: case LE: fputs ("ge", stream); break;
469 case GTU: case LTU: fputs ("gtu", stream); break;
470 case GEU: case LEU: fputs ("geu", stream); break;
476 if (GET_CODE (x) == MEM
477 && GET_CODE (XEXP (x, 0)) == PLUS
478 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
479 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
484 if (GET_CODE (x) != MEM)
487 switch (GET_CODE (x))
491 print_operand (stream, x, 0);
492 fputs (", 0", stream);
496 print_operand (stream, XEXP (x, 0), 0);
497 fputs (", ", stream);
498 print_operand (stream, XEXP (x, 1), 0);
507 if (x == CONST0_RTX (GET_MODE (x)))
509 fprintf ((stream), "r63");
514 if (GET_CODE (x) == CONST_INT)
516 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
523 switch (GET_CODE (x))
525 /* FIXME: We need this on SHmedia32 because reload generates
526 some sign-extended HI or QI loads into DImode registers
527 but, because Pmode is SImode, the address ends up with a
528 subreg:SI of the DImode register. Maybe reload should be
529 fixed so as to apply alter_subreg to such loads? */
531 if (SUBREG_BYTE (x) != 0
532 || GET_CODE (SUBREG_REG (x)) != REG)
539 if (FP_REGISTER_P (REGNO (x))
540 && GET_MODE (x) == V16SFmode)
541 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
542 else if (FP_REGISTER_P (REGNO (x))
543 && GET_MODE (x) == V4SFmode)
544 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
545 else if (GET_CODE (x) == REG
546 && GET_MODE (x) == V2SFmode)
547 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
548 else if (FP_REGISTER_P (REGNO (x))
549 && GET_MODE_SIZE (GET_MODE (x)) > 4)
550 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
552 fputs (reg_names[REGNO (x)], (stream));
556 output_address (XEXP (x, 0));
561 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
562 && GET_MODE (XEXP (x, 0)) == DImode
563 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
564 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
566 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
569 if (GET_CODE (val) == ASHIFTRT)
572 if (GET_CODE (XEXP (val, 0)) == CONST)
574 output_addr_const (stream, XEXP (val, 0));
575 if (GET_CODE (XEXP (val, 0)) == CONST)
577 fputs (" >> ", stream);
578 output_addr_const (stream, XEXP (val, 1));
583 if (GET_CODE (val) == CONST)
585 output_addr_const (stream, val);
586 if (GET_CODE (val) == CONST)
589 fputs (" & 65535)", stream);
597 output_addr_const (stream, x);
604 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
606 force_into (value, target)
609 value = force_operand (value, target);
610 if (! rtx_equal_p (value, target))
611 emit_insn (gen_move_insn (target, value));
614 /* Emit code to perform a block move. Choose the best method.
616 OPERANDS[0] is the destination.
617 OPERANDS[1] is the source.
618 OPERANDS[2] is the size.
619 OPERANDS[3] is the alignment safe to use. */
622 expand_block_move (operands)
625 int align = INTVAL (operands[3]);
626 int constp = (GET_CODE (operands[2]) == CONST_INT);
627 int bytes = (constp ? INTVAL (operands[2]) : 0);
629 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
630 alignment, or if it isn't a multiple of 4 bytes, then fail. */
631 if (! constp || align < 4 || (bytes % 4 != 0))
638 else if (bytes == 12)
643 rtx r4 = gen_rtx (REG, SImode, 4);
644 rtx r5 = gen_rtx (REG, SImode, 5);
646 entry_name = get_identifier ("__movstrSI12_i4");
648 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
649 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
650 force_into (XEXP (operands[0], 0), r4);
651 force_into (XEXP (operands[1], 0), r5);
652 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
655 else if (! TARGET_SMALLCODE)
661 rtx r4 = gen_rtx (REG, SImode, 4);
662 rtx r5 = gen_rtx (REG, SImode, 5);
663 rtx r6 = gen_rtx (REG, SImode, 6);
665 entry_name = get_identifier (bytes & 4
667 : "__movstr_i4_even");
668 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
669 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
670 force_into (XEXP (operands[0], 0), r4);
671 force_into (XEXP (operands[1], 0), r5);
674 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
675 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
687 rtx r4 = gen_rtx_REG (SImode, 4);
688 rtx r5 = gen_rtx_REG (SImode, 5);
690 sprintf (entry, "__movstrSI%d", bytes);
691 entry_name = get_identifier (entry);
692 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
693 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
694 force_into (XEXP (operands[0], 0), r4);
695 force_into (XEXP (operands[1], 0), r5);
696 emit_insn (gen_block_move_real (func_addr_rtx));
700 /* This is the same number of bytes as a memcpy call, but to a different
701 less common function name, so this will occasionally use more space. */
702 if (! TARGET_SMALLCODE)
707 int final_switch, while_loop;
708 rtx r4 = gen_rtx_REG (SImode, 4);
709 rtx r5 = gen_rtx_REG (SImode, 5);
710 rtx r6 = gen_rtx_REG (SImode, 6);
712 entry_name = get_identifier ("__movstr");
713 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
714 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
715 force_into (XEXP (operands[0], 0), r4);
716 force_into (XEXP (operands[1], 0), r5);
718 /* r6 controls the size of the move. 16 is decremented from it
719 for each 64 bytes moved. Then the negative bit left over is used
720 as an index into a list of move instructions. e.g., a 72 byte move
721 would be set up with size(r6) = 14, for one iteration through the
722 big while loop, and a switch of -2 for the last part. */
724 final_switch = 16 - ((bytes / 4) % 16);
725 while_loop = ((bytes / 4) / 16 - 1) * 16;
726 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
727 emit_insn (gen_block_lump_real (func_addr_rtx));
734 /* Prepare operands for a move define_expand; specifically, one of the
735 operands must be in a register. */
738 prepare_move_operands (operands, mode)
740 enum machine_mode mode;
742 if ((mode == SImode || mode == DImode)
744 && ! ((mode == Pmode || mode == ptr_mode)
745 && tls_symbolic_operand (operands[1], Pmode) != 0))
748 if (SYMBOLIC_CONST_P (operands[1]))
750 if (GET_CODE (operands[0]) == MEM)
751 operands[1] = force_reg (Pmode, operands[1]);
752 else if (TARGET_SHMEDIA
753 && GET_CODE (operands[1]) == LABEL_REF
754 && target_reg_operand (operands[0], mode))
758 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
759 operands[1] = legitimize_pic_address (operands[1], mode, temp);
762 else if (GET_CODE (operands[1]) == CONST
763 && GET_CODE (XEXP (operands[1], 0)) == PLUS
764 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
766 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
767 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
769 operands[1] = expand_binop (mode, add_optab, temp,
770 XEXP (XEXP (operands[1], 0), 1),
771 no_new_pseudos ? temp
772 : gen_reg_rtx (Pmode),
777 if (! reload_in_progress && ! reload_completed)
779 /* Copy the source to a register if both operands aren't registers. */
780 if (! register_operand (operands[0], mode)
781 && ! sh_register_operand (operands[1], mode))
782 operands[1] = copy_to_mode_reg (mode, operands[1]);
784 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
786 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
787 except that we can't use that function because it is static. */
788 rtx new = change_address (operands[0], mode, 0);
789 MEM_COPY_ATTRIBUTES (new, operands[0]);
793 /* This case can happen while generating code to move the result
794 of a library call to the target. Reject `st r0,@(rX,rY)' because
795 reload will fail to find a spill register for rX, since r0 is already
796 being used for the source. */
797 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
798 && GET_CODE (operands[0]) == MEM
799 && GET_CODE (XEXP (operands[0], 0)) == PLUS
800 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
801 operands[1] = copy_to_mode_reg (mode, operands[1]);
804 if (mode == Pmode || mode == ptr_mode)
807 enum tls_model tls_kind;
811 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
813 rtx tga_op1, tga_ret, tmp, tmp2;
818 case TLS_MODEL_GLOBAL_DYNAMIC:
819 tga_ret = gen_rtx_REG (Pmode, R0_REG);
820 emit_insn (gen_tls_global_dynamic (tga_ret, op1));
824 case TLS_MODEL_LOCAL_DYNAMIC:
825 tga_ret = gen_rtx_REG (Pmode, R0_REG);
826 emit_insn (gen_tls_local_dynamic (tga_ret, op1));
828 tmp = gen_reg_rtx (Pmode);
829 emit_move_insn (tmp, tga_ret);
831 if (register_operand (op0, Pmode))
834 tmp2 = gen_reg_rtx (Pmode);
836 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
840 case TLS_MODEL_INITIAL_EXEC:
842 emit_insn (gen_GOTaddr2picreg ());
843 tga_op1 = gen_reg_rtx (Pmode);
844 tmp = gen_sym2GOTTPOFF (op1);
845 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
849 case TLS_MODEL_LOCAL_EXEC:
850 tmp2 = gen_reg_rtx (Pmode);
851 emit_insn (gen_load_gbr (tmp2));
852 tmp = gen_reg_rtx (Pmode);
853 emit_insn (gen_symTPOFF2reg (tmp, op1));
854 RTX_UNCHANGING_P (tmp) = 1;
856 if (register_operand (op0, Pmode))
859 op1 = gen_reg_rtx (Pmode);
861 emit_insn (gen_addsi3 (op1, tmp, tmp2));
874 /* Prepare the operands for an scc instruction; make sure that the
875 compare has been done. */
877 prepare_scc_operands (code)
880 rtx t_reg = gen_rtx_REG (SImode, T_REG);
881 enum rtx_code oldcode = code;
882 enum machine_mode mode;
884 /* First need a compare insn. */
888 /* It isn't possible to handle this case. */
907 rtx tmp = sh_compare_op0;
908 sh_compare_op0 = sh_compare_op1;
909 sh_compare_op1 = tmp;
912 mode = GET_MODE (sh_compare_op0);
913 if (mode == VOIDmode)
914 mode = GET_MODE (sh_compare_op1);
916 sh_compare_op0 = force_reg (mode, sh_compare_op0);
917 if ((code != EQ && code != NE
918 && (sh_compare_op1 != const0_rtx
919 || code == GTU || code == GEU || code == LTU || code == LEU))
920 || (mode == DImode && sh_compare_op1 != const0_rtx)
921 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
922 sh_compare_op1 = force_reg (mode, sh_compare_op1);
924 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
925 (mode == SFmode ? emit_sf_insn : emit_df_insn)
926 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
927 gen_rtx (SET, VOIDmode, t_reg,
928 gen_rtx (code, SImode,
929 sh_compare_op0, sh_compare_op1)),
930 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
932 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
933 gen_rtx (code, SImode, sh_compare_op0,
939 /* Called from the md file, set up the operands of a compare instruction. */
942 from_compare (operands, code)
946 enum machine_mode mode = GET_MODE (sh_compare_op0);
948 if (mode == VOIDmode)
949 mode = GET_MODE (sh_compare_op1);
952 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
954 /* Force args into regs, since we can't use constants here. */
955 sh_compare_op0 = force_reg (mode, sh_compare_op0);
956 if (sh_compare_op1 != const0_rtx
957 || code == GTU || code == GEU
958 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
959 sh_compare_op1 = force_reg (mode, sh_compare_op1);
961 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
963 from_compare (operands, GT);
964 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
967 insn = gen_rtx_SET (VOIDmode,
968 gen_rtx_REG (SImode, T_REG),
969 gen_rtx (code, SImode, sh_compare_op0,
971 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
973 insn = gen_rtx (PARALLEL, VOIDmode,
975 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
976 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
982 /* Functions to output assembly code. */
984 /* Return a sequence of instructions to perform DI or DF move.
986 Since the SH cannot move a DI or DF in one instruction, we have
987 to take care when we see overlapping source and dest registers. */
990 output_movedouble (insn, operands, mode)
991 rtx insn ATTRIBUTE_UNUSED;
993 enum machine_mode mode;
995 rtx dst = operands[0];
996 rtx src = operands[1];
998 if (GET_CODE (dst) == MEM
999 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1000 return "mov.l %T1,%0\n\tmov.l %1,%0";
1002 if (register_operand (dst, mode)
1003 && register_operand (src, mode))
1005 if (REGNO (src) == MACH_REG)
1006 return "sts mach,%S0\n\tsts macl,%R0";
1008 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1009 when mov.d r1,r0 do r1->r0 then r2->r1. */
1011 if (REGNO (src) + 1 == REGNO (dst))
1012 return "mov %T1,%T0\n\tmov %1,%0";
1014 return "mov %1,%0\n\tmov %T1,%T0";
1016 else if (GET_CODE (src) == CONST_INT)
1018 if (INTVAL (src) < 0)
1019 output_asm_insn ("mov #-1,%S0", operands);
1021 output_asm_insn ("mov #0,%S0", operands);
1023 return "mov %1,%R0";
1025 else if (GET_CODE (src) == MEM)
1028 int dreg = REGNO (dst);
1029 rtx inside = XEXP (src, 0);
1031 if (GET_CODE (inside) == REG)
1032 ptrreg = REGNO (inside);
1033 else if (GET_CODE (inside) == SUBREG)
1034 ptrreg = subreg_regno (inside);
1035 else if (GET_CODE (inside) == PLUS)
1037 ptrreg = REGNO (XEXP (inside, 0));
1038 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1039 an offsettable address. Unfortunately, offsettable addresses use
1040 QImode to check the offset, and a QImode offsettable address
1041 requires r0 for the other operand, which is not currently
1042 supported, so we can't use the 'o' constraint.
1043 Thus we must check for and handle r0+REG addresses here.
1044 We punt for now, since this is likely very rare. */
1045 if (GET_CODE (XEXP (inside, 1)) == REG)
1048 else if (GET_CODE (inside) == LABEL_REF)
1049 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1050 else if (GET_CODE (inside) == POST_INC)
1051 return "mov.l %1,%0\n\tmov.l %1,%T0";
1055 /* Work out the safe way to copy. Copy into the second half first. */
1057 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1060 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1063 /* Print an instruction which would have gone into a delay slot after
1064 another instruction, but couldn't because the other instruction expanded
1065 into a sequence where putting the slot insn at the end wouldn't work. */
1071 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
1073 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1077 output_far_jump (insn, op)
1081 struct { rtx lab, reg, op; } this;
1082 rtx braf_base_lab = NULL_RTX;
1085 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1088 this.lab = gen_label_rtx ();
1092 && offset - get_attr_length (insn) <= 32766)
1095 jump = "mov.w %O0,%1; braf %1";
1103 jump = "mov.l %O0,%1; braf %1";
1105 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1108 jump = "mov.l %O0,%1; jmp @%1";
1110 /* If we have a scratch register available, use it. */
1111 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1112 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1114 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1115 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1116 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1117 output_asm_insn (jump, &this.lab);
1118 if (dbr_sequence_length ())
1119 print_slot (final_sequence);
1121 output_asm_insn ("nop", 0);
1125 /* Output the delay slot insn first if any. */
1126 if (dbr_sequence_length ())
1127 print_slot (final_sequence);
1129 this.reg = gen_rtx_REG (SImode, 13);
1130 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1131 Fortunately, MACL is fixed and call-clobbered, and we never
1132 need its value across jumps, so save r13 in it instead of in
1135 output_asm_insn ("lds r13, macl", 0);
1137 output_asm_insn ("mov.l r13,@-r15", 0);
1138 output_asm_insn (jump, &this.lab);
1140 output_asm_insn ("sts macl, r13", 0);
1142 output_asm_insn ("mov.l @r15+,r13", 0);
1144 if (far && flag_pic && TARGET_SH2)
1146 braf_base_lab = gen_label_rtx ();
1147 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1148 CODE_LABEL_NUMBER (braf_base_lab));
1151 output_asm_insn (".align 2", 0);
1152 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1154 if (far && flag_pic)
1157 this.lab = braf_base_lab;
1158 output_asm_insn (".long %O2-%O0", &this.lab);
1161 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1165 /* Local label counter, used for constants in the pool and inside
1166 pattern branches. */
1168 static int lf = 100;
1170 /* Output code for ordinary branches. */
1173 output_branch (logic, insn, operands)
1178 switch (get_attr_length (insn))
1181 /* This can happen if filling the delay slot has caused a forward
1182 branch to exceed its range (we could reverse it, but only
1183 when we know we won't overextend other branches; this should
1184 best be handled by relaxation).
1185 It can also happen when other condbranches hoist delay slot insn
1186 from their destination, thus leading to code size increase.
1187 But the branch will still be in the range -4092..+4098 bytes. */
1192 /* The call to print_slot will clobber the operands. */
1193 rtx op0 = operands[0];
1195 /* If the instruction in the delay slot is annulled (true), then
1196 there is no delay slot where we can put it now. The only safe
1197 place for it is after the label. final will do that by default. */
1200 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1202 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1203 ASSEMBLER_DIALECT ? "/" : ".", label);
1204 print_slot (final_sequence);
1207 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1209 output_asm_insn ("bra\t%l0", &op0);
1210 fprintf (asm_out_file, "\tnop\n");
1211 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1215 /* When relaxing, handle this like a short branch. The linker
1216 will fix it up if it still doesn't fit after relaxation. */
1218 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1220 /* These are for SH2e, in which we have to account for the
1221 extra nop because of the hardware bug in annulled branches. */
1228 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1230 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1232 ASSEMBLER_DIALECT ? "/" : ".", label);
1233 fprintf (asm_out_file, "\tnop\n");
1234 output_asm_insn ("bra\t%l0", operands);
1235 fprintf (asm_out_file, "\tnop\n");
1236 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1240 /* When relaxing, fall through. */
1245 sprintf (buffer, "b%s%ss\t%%l0",
1247 ASSEMBLER_DIALECT ? "/" : ".");
1248 output_asm_insn (buffer, &operands[0]);
1253 /* There should be no longer branches now - that would
1254 indicate that something has destroyed the branches set
1255 up in machine_dependent_reorg. */
1261 output_branchy_insn (code, template, insn, operands)
1263 const char *template;
1267 rtx next_insn = NEXT_INSN (insn);
1269 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1271 rtx src = SET_SRC (PATTERN (next_insn));
1272 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1274 /* Following branch not taken */
1275 operands[9] = gen_label_rtx ();
1276 emit_label_after (operands[9], next_insn);
1277 INSN_ADDRESSES_NEW (operands[9],
1278 INSN_ADDRESSES (INSN_UID (next_insn))
1279 + get_attr_length (next_insn));
1284 int offset = (branch_dest (next_insn)
1285 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1286 if (offset >= -252 && offset <= 258)
1288 if (GET_CODE (src) == IF_THEN_ELSE)
1290 src = XEXP (src, 1);
1296 operands[9] = gen_label_rtx ();
1297 emit_label_after (operands[9], insn);
1298 INSN_ADDRESSES_NEW (operands[9],
1299 INSN_ADDRESSES (INSN_UID (insn))
1300 + get_attr_length (insn));
1305 output_ieee_ccmpeq (insn, operands)
1306 rtx insn, *operands;
1308 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1311 /* Output the start of the assembler file. */
1316 default_file_start ();
1319 /* We need to show the text section with the proper
1320 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1321 emits it without attributes in TEXT_SECTION, else GAS
1322 will complain. We can teach GAS specifically about the
1323 default attributes for our choice of text section, but
1324 then we would have to change GAS again if/when we change
1325 the text section name. */
1326 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1328 /* Switch to the data section so that the coffsem symbol
1329 isn't in the text section. */
1332 if (TARGET_LITTLE_ENDIAN)
1333 fputs ("\t.little\n", asm_out_file);
1337 if (TARGET_SHCOMPACT)
1338 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1339 else if (TARGET_SHMEDIA)
1340 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1341 TARGET_SHMEDIA64 ? 64 : 32);
1345 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1348 unspec_caller_rtx_p (pat)
1351 switch (GET_CODE (pat))
1354 return unspec_caller_rtx_p (XEXP (pat, 0));
1357 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1359 return unspec_caller_rtx_p (XEXP (pat, 1));
1361 if (XINT (pat, 1) == UNSPEC_CALLER)
1370 /* Indicate that INSN cannot be duplicated. This is true for insn
1371 that generates an unique label. */
1374 sh_cannot_copy_insn_p (insn)
1379 if (!reload_completed || !flag_pic)
1382 if (GET_CODE (insn) != INSN)
1384 if (asm_noperands (insn) >= 0)
1387 pat = PATTERN (insn);
1388 if (GET_CODE (pat) != SET)
1390 pat = SET_SRC (pat);
1392 if (unspec_caller_rtx_p (pat))
1398 /* Actual number of instructions used to make a shift by N. */
1399 static const char ashiftrt_insns[] =
1400 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1402 /* Left shift and logical right shift are the same. */
1403 static const char shift_insns[] =
1404 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1406 /* Individual shift amounts needed to get the above length sequences.
1407 One bit right shifts clobber the T bit, so when possible, put one bit
1408 shifts in the middle of the sequence, so the ends are eligible for
1409 branch delay slots. */
1410 static const short shift_amounts[32][5] = {
1411 {0}, {1}, {2}, {2, 1},
1412 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1413 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1414 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1415 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1416 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1417 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1418 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1420 /* Likewise, but for shift amounts < 16, up to three highmost bits
1421 might be clobbered. This is typically used when combined with some
1422 kind of sign or zero extension. */
1424 static const char ext_shift_insns[] =
1425 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1427 static const short ext_shift_amounts[32][4] = {
1428 {0}, {1}, {2}, {2, 1},
1429 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1430 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1431 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1432 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1433 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1434 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1435 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1437 /* Assuming we have a value that has been sign-extended by at least one bit,
1438 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1439 to shift it by N without data loss, and quicker than by other means? */
1440 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1442 /* This is used in length attributes in sh.md to help compute the length
1443 of arbitrary constant shift instructions. */
1446 shift_insns_rtx (insn)
1449 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1450 int shift_count = INTVAL (XEXP (set_src, 1));
1451 enum rtx_code shift_code = GET_CODE (set_src);
1456 return ashiftrt_insns[shift_count];
1459 return shift_insns[shift_count];
1465 /* Return the cost of a shift. */
1476 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1478 if (GET_MODE (x) == DImode
1479 && GET_CODE (XEXP (x, 1)) == CONST_INT
1480 && INTVAL (XEXP (x, 1)) == 1)
1483 /* Everything else is invalid, because there is no pattern for it. */
1486 /* If shift by a non constant, then this will be expensive. */
1487 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1488 return SH_DYNAMIC_SHIFT_COST;
1490 value = INTVAL (XEXP (x, 1));
1492 /* Otherwise, return the true cost in instructions. */
1493 if (GET_CODE (x) == ASHIFTRT)
1495 int cost = ashiftrt_insns[value];
1496 /* If SH3, then we put the constant in a reg and use shad. */
1497 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1498 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1502 return shift_insns[value];
1505 /* Return the cost of an AND operation. */
1513 /* Anding with a register is a single cycle and instruction. */
1514 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1517 i = INTVAL (XEXP (x, 1));
1521 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1522 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1523 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1529 /* These constants are single cycle extu.[bw] instructions. */
1530 if (i == 0xff || i == 0xffff)
1532 /* Constants that can be used in an and immediate instruction in a single
1533 cycle, but this requires r0, so make it a little more expensive. */
1534 if (CONST_OK_FOR_K08 (i))
1536 /* Constants that can be loaded with a mov immediate and an and.
1537 This case is probably unnecessary. */
1538 if (CONST_OK_FOR_I08 (i))
1540 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1541 This case is probably unnecessary. */
1545 /* Return the cost of an addition or a subtraction. */
1551 /* Adding a register is a single cycle insn. */
1552 if (GET_CODE (XEXP (x, 1)) == REG
1553 || GET_CODE (XEXP (x, 1)) == SUBREG)
1556 /* Likewise for small constants. */
1557 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1558 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1562 switch (GET_CODE (XEXP (x, 1)))
1567 return TARGET_SHMEDIA64 ? 5 : 3;
1570 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1572 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1574 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1582 /* Any other constant requires a 2 cycle pc-relative load plus an
1587 /* Return the cost of a multiply. */
1590 rtx x ATTRIBUTE_UNUSED;
1597 /* We have a mul insn, so we can never take more than the mul and the
1598 read of the mac reg, but count more because of the latency and extra
1600 if (TARGET_SMALLCODE)
1605 /* If we're aiming at small code, then just count the number of
1606 insns in a multiply call sequence. */
1607 if (TARGET_SMALLCODE)
1610 /* Otherwise count all the insns in the routine we'd be calling too. */
1614 /* Compute a (partial) cost for rtx X. Return true if the complete
1615 cost has been computed, and false if subexpressions should be
1616 scanned. In either case, *TOTAL contains the cost result. */
1619 sh_rtx_costs (x, code, outer_code, total)
1621 int code, outer_code, *total;
1628 if (INTVAL (x) == 0)
1630 else if (outer_code == AND && and_operand ((x), DImode))
1632 else if ((outer_code == IOR || outer_code == XOR
1633 || outer_code == PLUS)
1634 && CONST_OK_FOR_I10 (INTVAL (x)))
1636 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1637 *total = COSTS_N_INSNS (outer_code != SET);
1638 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1639 *total = COSTS_N_INSNS (2);
1640 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1641 *total = COSTS_N_INSNS (3);
1643 *total = COSTS_N_INSNS (4);
1646 if (CONST_OK_FOR_I08 (INTVAL (x)))
1648 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1649 && CONST_OK_FOR_K08 (INTVAL (x)))
1658 if (TARGET_SHMEDIA64)
1659 *total = COSTS_N_INSNS (4);
1660 else if (TARGET_SHMEDIA32)
1661 *total = COSTS_N_INSNS (2);
1668 *total = COSTS_N_INSNS (4);
1674 *total = COSTS_N_INSNS (addsubcosts (x));
1678 *total = COSTS_N_INSNS (andcosts (x));
1682 *total = COSTS_N_INSNS (multcosts (x));
1688 *total = COSTS_N_INSNS (shiftcosts (x));
1695 *total = COSTS_N_INSNS (20);
1708 /* Compute the cost of an address. For the SH, all valid addresses are
1709 the same cost. Use a slightly higher cost for reg + reg addressing,
1710 since it increases pressure on r0. */
1716 return (GET_CODE (X) == PLUS
1717 && ! CONSTANT_P (XEXP (X, 1))
1718 && ! TARGET_SHMEDIA ? 1 : 0);
1721 /* Code to expand a shift. */
1724 gen_ashift (type, n, reg)
1729 /* Negative values here come from the shift_amounts array. */
1742 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1746 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1748 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1751 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1756 /* Same for HImode */
1759 gen_ashift_hi (type, n, reg)
1764 /* Negative values here come from the shift_amounts array. */
1778 /* We don't have HImode right shift operations because using the
1779 ordinary 32 bit shift instructions for that doesn't generate proper
1780 zero/sign extension.
1781 gen_ashift_hi is only called in contexts where we know that the
1782 sign extension works out correctly. */
1785 if (GET_CODE (reg) == SUBREG)
1787 offset = SUBREG_BYTE (reg);
1788 reg = SUBREG_REG (reg);
1790 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1794 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1799 /* Output RTL to split a constant shift into its component SH constant
1800 shift instructions. */
1803 gen_shifty_op (code, operands)
1807 int value = INTVAL (operands[2]);
1810 /* Truncate the shift count in case it is out of bounds. */
1811 value = value & 0x1f;
1815 if (code == LSHIFTRT)
1817 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1818 emit_insn (gen_movt (operands[0]));
1821 else if (code == ASHIFT)
1823 /* There is a two instruction sequence for 31 bit left shifts,
1824 but it requires r0. */
1825 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1827 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1828 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1833 else if (value == 0)
1835 /* This can happen when not optimizing. We must output something here
1836 to prevent the compiler from aborting in final.c after the try_split
1838 emit_insn (gen_nop ());
1842 max = shift_insns[value];
1843 for (i = 0; i < max; i++)
1844 gen_ashift (code, shift_amounts[value][i], operands[0]);
1847 /* Same as above, but optimized for values where the topmost bits don't
1851 gen_shifty_hi_op (code, operands)
1855 int value = INTVAL (operands[2]);
1857 void (*gen_fun) PARAMS ((int, int, rtx));
1859 /* This operation is used by and_shl for SImode values with a few
1860 high bits known to be cleared. */
1864 emit_insn (gen_nop ());
1868 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1871 max = ext_shift_insns[value];
1872 for (i = 0; i < max; i++)
1873 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1876 /* When shifting right, emit the shifts in reverse order, so that
1877 solitary negative values come first. */
1878 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1879 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1882 /* Output RTL for an arithmetic right shift. */
1884 /* ??? Rewrite to use super-optimizer sequences. */
1887 expand_ashiftrt (operands)
1898 if (GET_CODE (operands[2]) != CONST_INT)
1900 rtx count = copy_to_mode_reg (SImode, operands[2]);
1901 emit_insn (gen_negsi2 (count, count));
1902 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1905 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1906 > 1 + SH_DYNAMIC_SHIFT_COST)
1909 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1910 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1914 if (GET_CODE (operands[2]) != CONST_INT)
1917 value = INTVAL (operands[2]) & 31;
1921 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1924 else if (value >= 16 && value <= 19)
1926 wrk = gen_reg_rtx (SImode);
1927 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1930 gen_ashift (ASHIFTRT, 1, wrk);
1931 emit_move_insn (operands[0], wrk);
1934 /* Expand a short sequence inline, longer call a magic routine. */
1935 else if (value <= 5)
1937 wrk = gen_reg_rtx (SImode);
1938 emit_move_insn (wrk, operands[1]);
1940 gen_ashift (ASHIFTRT, 1, wrk);
1941 emit_move_insn (operands[0], wrk);
1945 wrk = gen_reg_rtx (Pmode);
1947 /* Load the value into an arg reg and call a helper. */
1948 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1949 sprintf (func, "__ashiftrt_r4_%d", value);
1950 func_name = get_identifier (func);
1951 sym = function_symbol (IDENTIFIER_POINTER (func_name));
1952 emit_move_insn (wrk, sym);
1953 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1954 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1959 sh_dynamicalize_shift_p (count)
1962 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1965 /* Try to find a good way to implement the combiner pattern
1966 [(set (match_operand:SI 0 "register_operand" "r")
1967 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1968 (match_operand:SI 2 "const_int_operand" "n"))
1969 (match_operand:SI 3 "const_int_operand" "n"))) .
1970 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1971 return 0 for simple right / left or left/right shift combination.
1972 return 1 for a combination of shifts with zero_extend.
1973 return 2 for a combination of shifts with an AND that needs r0.
1974 return 3 for a combination of shifts with an AND that needs an extra
1975 scratch register, when the three highmost bits of the AND mask are clear.
1976 return 4 for a combination of shifts with an AND that needs an extra
1977 scratch register, when any of the three highmost bits of the AND mask
1979 If ATTRP is set, store an initial right shift width in ATTRP[0],
1980 and the instruction length in ATTRP[1] . These values are not valid
1982 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1983 shift_amounts for the last shift value that is to be used before the
1986 shl_and_kind (left_rtx, mask_rtx, attrp)
1987 rtx left_rtx, mask_rtx;
1990 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1991 int left = INTVAL (left_rtx), right;
1993 int cost, best_cost = 10000;
1994 int best_right = 0, best_len = 0;
1998 if (left < 0 || left > 31)
2000 if (GET_CODE (mask_rtx) == CONST_INT)
2001 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2003 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2004 /* Can this be expressed as a right shift / left shift pair ? */
2005 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2006 right = exact_log2 (lsb);
2007 mask2 = ~(mask + lsb - 1);
2008 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2009 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2011 best_cost = shift_insns[right] + shift_insns[right + left];
2012 /* mask has no trailing zeroes <==> ! right */
2013 else if (! right && mask2 == ~(lsb2 - 1))
2015 int late_right = exact_log2 (lsb2);
2016 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2018 /* Try to use zero extend */
2019 if (mask2 == ~(lsb2 - 1))
2023 for (width = 8; width <= 16; width += 8)
2025 /* Can we zero-extend right away? */
2026 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
2029 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2030 if (cost < best_cost)
2041 /* ??? Could try to put zero extend into initial right shift,
2042 or even shift a bit left before the right shift. */
2043 /* Determine value of first part of left shift, to get to the
2044 zero extend cut-off point. */
2045 first = width - exact_log2 (lsb2) + right;
2046 if (first >= 0 && right + left - first >= 0)
2048 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2049 + ext_shift_insns[right + left - first];
2050 if (cost < best_cost)
2062 /* Try to use r0 AND pattern */
2063 for (i = 0; i <= 2; i++)
2067 if (! CONST_OK_FOR_K08 (mask >> i))
2069 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2070 if (cost < best_cost)
2075 best_len = cost - 1;
2078 /* Try to use a scratch register to hold the AND operand. */
2079 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2080 for (i = 0; i <= 2; i++)
2084 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2085 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2086 if (cost < best_cost)
2091 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2097 attrp[0] = best_right;
2098 attrp[1] = best_len;
2103 /* This is used in length attributes of the unnamed instructions
2104 corresponding to shl_and_kind return values of 1 and 2. */
2106 shl_and_length (insn)
2109 rtx set_src, left_rtx, mask_rtx;
2112 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2113 left_rtx = XEXP (XEXP (set_src, 0), 1);
2114 mask_rtx = XEXP (set_src, 1);
2115 shl_and_kind (left_rtx, mask_rtx, attributes);
2116 return attributes[1];
2119 /* This is used in length attribute of the and_shl_scratch instruction. */
2122 shl_and_scr_length (insn)
2125 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2126 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2127 rtx op = XEXP (set_src, 0);
2128 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2129 op = XEXP (XEXP (op, 0), 0);
2130 return len + shift_insns[INTVAL (XEXP (op, 1))];
2133 /* Generating rtl? */
2134 extern int rtx_equal_function_value_matters;
2136 /* Generate rtl for instructions for which shl_and_kind advised a particular
2137 method of generating them, i.e. returned zero. */
2140 gen_shl_and (dest, left_rtx, mask_rtx, source)
2141 rtx dest, left_rtx, mask_rtx, source;
2144 unsigned HOST_WIDE_INT mask;
2145 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2146 int right, total_shift;
2147 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
2149 right = attributes[0];
2150 total_shift = INTVAL (left_rtx) + right;
2151 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2158 int first = attributes[2];
2163 emit_insn ((mask << right) <= 0xff
2164 ? gen_zero_extendqisi2(dest,
2165 gen_lowpart (QImode, source))
2166 : gen_zero_extendhisi2(dest,
2167 gen_lowpart (HImode, source)));
2171 emit_insn (gen_movsi (dest, source));
2175 operands[2] = GEN_INT (right);
2176 gen_shifty_hi_op (LSHIFTRT, operands);
2180 operands[2] = GEN_INT (first);
2181 gen_shifty_hi_op (ASHIFT, operands);
2182 total_shift -= first;
2186 emit_insn (mask <= 0xff
2187 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2188 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2189 if (total_shift > 0)
2191 operands[2] = GEN_INT (total_shift);
2192 gen_shifty_hi_op (ASHIFT, operands);
2197 shift_gen_fun = gen_shifty_op;
2199 /* If the topmost bit that matters is set, set the topmost bits
2200 that don't matter. This way, we might be able to get a shorter
2202 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2203 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2205 /* Don't expand fine-grained when combining, because that will
2206 make the pattern fail. */
2207 if (rtx_equal_function_value_matters
2208 || reload_in_progress || reload_completed)
2212 /* Cases 3 and 4 should be handled by this split
2213 only while combining */
2218 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2221 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2226 operands[2] = GEN_INT (total_shift);
2227 shift_gen_fun (ASHIFT, operands);
2234 if (kind != 4 && total_shift < 16)
2236 neg = -ext_shift_amounts[total_shift][1];
2238 neg -= ext_shift_amounts[total_shift][2];
2242 emit_insn (gen_and_shl_scratch (dest, source,
2245 GEN_INT (total_shift + neg),
2247 emit_insn (gen_movsi (dest, dest));
2254 /* Try to find a good way to implement the combiner pattern
2255 [(set (match_operand:SI 0 "register_operand" "=r")
2256 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2257 (match_operand:SI 2 "const_int_operand" "n")
2258 (match_operand:SI 3 "const_int_operand" "n")
2260 (clobber (reg:SI T_REG))]
2261 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2262 return 0 for simple left / right shift combination.
2263 return 1 for left shift / 8 bit sign extend / left shift.
2264 return 2 for left shift / 16 bit sign extend / left shift.
2265 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2266 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2267 return 5 for left shift / 16 bit sign extend / right shift
2268 return 6 for < 8 bit sign extend / left shift.
2269 return 7 for < 8 bit sign extend / left shift / single right shift.
2270 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2273 shl_sext_kind (left_rtx, size_rtx, costp)
2274 rtx left_rtx, size_rtx;
2277 int left, size, insize, ext;
2278 int cost = 0, best_cost;
2281 left = INTVAL (left_rtx);
2282 size = INTVAL (size_rtx);
2283 insize = size - left;
2286 /* Default to left / right shift. */
2288 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2291 /* 16 bit shift / sign extend / 16 bit shift */
2292 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2293 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2294 below, by alternative 3 or something even better. */
2295 if (cost < best_cost)
2301 /* Try a plain sign extend between two shifts. */
2302 for (ext = 16; ext >= insize; ext -= 8)
2306 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2307 if (cost < best_cost)
2309 kind = ext / (unsigned) 8;
2313 /* Check if we can do a sloppy shift with a final signed shift
2314 restoring the sign. */
2315 if (EXT_SHIFT_SIGNED (size - ext))
2316 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2317 /* If not, maybe it's still cheaper to do the second shift sloppy,
2318 and do a final sign extend? */
2319 else if (size <= 16)
2320 cost = ext_shift_insns[ext - insize] + 1
2321 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2324 if (cost < best_cost)
2326 kind = ext / (unsigned) 8 + 2;
2330 /* Check if we can sign extend in r0 */
2333 cost = 3 + shift_insns[left];
2334 if (cost < best_cost)
2339 /* Try the same with a final signed shift. */
2342 cost = 3 + ext_shift_insns[left + 1] + 1;
2343 if (cost < best_cost)
2352 /* Try to use a dynamic shift. */
2353 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2354 if (cost < best_cost)
2365 /* Function to be used in the length attribute of the instructions
2366 implementing this pattern. */
2369 shl_sext_length (insn)
2372 rtx set_src, left_rtx, size_rtx;
2375 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2376 left_rtx = XEXP (XEXP (set_src, 0), 1);
2377 size_rtx = XEXP (set_src, 1);
2378 shl_sext_kind (left_rtx, size_rtx, &cost);
2382 /* Generate rtl for this pattern */
2385 gen_shl_sext (dest, left_rtx, size_rtx, source)
2386 rtx dest, left_rtx, size_rtx, source;
2389 int left, size, insize, cost;
2392 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2393 left = INTVAL (left_rtx);
2394 size = INTVAL (size_rtx);
2395 insize = size - left;
2403 int ext = kind & 1 ? 8 : 16;
2404 int shift2 = size - ext;
2406 /* Don't expand fine-grained when combining, because that will
2407 make the pattern fail. */
2408 if (! rtx_equal_function_value_matters
2409 && ! reload_in_progress && ! reload_completed)
2411 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2412 emit_insn (gen_movsi (dest, source));
2416 emit_insn (gen_movsi (dest, source));
2420 operands[2] = GEN_INT (ext - insize);
2421 gen_shifty_hi_op (ASHIFT, operands);
2424 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2425 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2430 operands[2] = GEN_INT (shift2);
2431 gen_shifty_op (ASHIFT, operands);
2438 if (EXT_SHIFT_SIGNED (shift2))
2440 operands[2] = GEN_INT (shift2 + 1);
2441 gen_shifty_op (ASHIFT, operands);
2442 operands[2] = GEN_INT (1);
2443 gen_shifty_op (ASHIFTRT, operands);
2446 operands[2] = GEN_INT (shift2);
2447 gen_shifty_hi_op (ASHIFT, operands);
2451 operands[2] = GEN_INT (-shift2);
2452 gen_shifty_hi_op (LSHIFTRT, operands);
2454 emit_insn (size <= 8
2455 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2456 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2463 if (! rtx_equal_function_value_matters
2464 && ! reload_in_progress && ! reload_completed)
2465 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2469 operands[2] = GEN_INT (16 - insize);
2470 gen_shifty_hi_op (ASHIFT, operands);
2471 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2473 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2475 gen_ashift (ASHIFTRT, 1, dest);
2480 /* Don't expand fine-grained when combining, because that will
2481 make the pattern fail. */
2482 if (! rtx_equal_function_value_matters
2483 && ! reload_in_progress && ! reload_completed)
2485 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2486 emit_insn (gen_movsi (dest, source));
2489 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2490 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2491 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2493 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2494 gen_shifty_op (ASHIFT, operands);
2496 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2504 /* Prefix a symbol_ref name with "datalabel". */
2507 gen_datalabel_ref (sym)
2510 if (GET_CODE (sym) == LABEL_REF)
2511 return gen_rtx_CONST (GET_MODE (sym),
2512 gen_rtx_UNSPEC (GET_MODE (sym),
2516 if (GET_CODE (sym) != SYMBOL_REF)
2523 /* The SH cannot load a large constant into a register, constants have to
2524 come from a pc relative load. The reference of a pc relative load
2525 instruction must be less than 1k infront of the instruction. This
2526 means that we often have to dump a constant inside a function, and
2527 generate code to branch around it.
2529 It is important to minimize this, since the branches will slow things
2530 down and make things bigger.
2532 Worst case code looks like:
2550 We fix this by performing a scan before scheduling, which notices which
2551 instructions need to have their operands fetched from the constant table
2552 and builds the table.
2556 scan, find an instruction which needs a pcrel move. Look forward, find the
2557 last barrier which is within MAX_COUNT bytes of the requirement.
2558 If there isn't one, make one. Process all the instructions between
2559 the find and the barrier.
2561 In the above example, we can tell that L3 is within 1k of L1, so
2562 the first move can be shrunk from the 3 insn+constant sequence into
2563 just 1 insn, and the constant moved to L3 to make:
2574 Then the second move becomes the target for the shortening process. */
2578 rtx value; /* Value in table. */
2579 rtx label; /* Label of value. */
2580 rtx wend; /* End of window. */
2581 enum machine_mode mode; /* Mode of value. */
2583 /* True if this constant is accessed as part of a post-increment
2584 sequence. Note that HImode constants are never accessed in this way. */
2585 bool part_of_sequence_p;
2588 /* The maximum number of constants that can fit into one pool, since
2589 the pc relative range is 0...1020 bytes and constants are at least 4
2592 #define MAX_POOL_SIZE (1020/4)
2593 static pool_node pool_vector[MAX_POOL_SIZE];
2594 static int pool_size;
2595 static rtx pool_window_label;
2596 static int pool_window_last;
2598 /* ??? If we need a constant in HImode which is the truncated value of a
2599 constant we need in SImode, we could combine the two entries thus saving
2600 two bytes. Is this common enough to be worth the effort of implementing
2603 /* ??? This stuff should be done at the same time that we shorten branches.
2604 As it is now, we must assume that all branches are the maximum size, and
2605 this causes us to almost always output constant pools sooner than
2608 /* Add a constant to the pool and return its label. */
2611 add_constant (x, mode, last_value)
2613 enum machine_mode mode;
2617 rtx lab, new, ref, newref;
2619 /* First see if we've already got it. */
2620 for (i = 0; i < pool_size; i++)
2622 if (x->code == pool_vector[i].value->code
2623 && mode == pool_vector[i].mode)
2625 if (x->code == CODE_LABEL)
2627 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2630 if (rtx_equal_p (x, pool_vector[i].value))
2635 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2637 new = gen_label_rtx ();
2638 LABEL_REFS (new) = pool_vector[i].label;
2639 pool_vector[i].label = lab = new;
2641 if (lab && pool_window_label)
2643 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2644 ref = pool_vector[pool_window_last].wend;
2645 LABEL_NEXTREF (newref) = ref;
2646 pool_vector[pool_window_last].wend = newref;
2649 pool_window_label = new;
2650 pool_window_last = i;
2656 /* Need a new one. */
2657 pool_vector[pool_size].value = x;
2658 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2661 pool_vector[pool_size - 1].part_of_sequence_p = true;
2664 lab = gen_label_rtx ();
2665 pool_vector[pool_size].mode = mode;
2666 pool_vector[pool_size].label = lab;
2667 pool_vector[pool_size].wend = NULL_RTX;
2668 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2669 if (lab && pool_window_label)
2671 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2672 ref = pool_vector[pool_window_last].wend;
2673 LABEL_NEXTREF (newref) = ref;
2674 pool_vector[pool_window_last].wend = newref;
2677 pool_window_label = lab;
2678 pool_window_last = pool_size;
2683 /* Output the literal table. */
2694 /* Do two passes, first time dump out the HI sized constants. */
2696 for (i = 0; i < pool_size; i++)
2698 pool_node *p = &pool_vector[i];
2700 if (p->mode == HImode)
2704 scan = emit_insn_after (gen_align_2 (), scan);
2707 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2708 scan = emit_label_after (lab, scan);
2709 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2711 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2713 lab = XEXP (ref, 0);
2714 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2717 else if (p->mode == DFmode)
2723 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2725 rtx align_insn = NULL_RTX;
2727 scan = emit_label_after (gen_label_rtx (), scan);
2728 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2731 for (i = 0; i < pool_size; i++)
2733 pool_node *p = &pool_vector[i];
2741 if (align_insn && !p->part_of_sequence_p)
2743 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2744 emit_label_before (lab, align_insn);
2745 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2747 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2749 lab = XEXP (ref, 0);
2750 emit_insn_before (gen_consttable_window_end (lab),
2753 delete_insn (align_insn);
2754 align_insn = NULL_RTX;
2759 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2760 scan = emit_label_after (lab, scan);
2761 scan = emit_insn_after (gen_consttable_4 (p->value,
2763 need_align = ! need_align;
2769 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2774 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2775 scan = emit_label_after (lab, scan);
2776 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2784 if (p->mode != HImode)
2786 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2788 lab = XEXP (ref, 0);
2789 scan = emit_insn_after (gen_consttable_window_end (lab),
2798 for (i = 0; i < pool_size; i++)
2800 pool_node *p = &pool_vector[i];
2811 scan = emit_label_after (gen_label_rtx (), scan);
2812 scan = emit_insn_after (gen_align_4 (), scan);
2814 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2815 scan = emit_label_after (lab, scan);
2816 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2824 scan = emit_label_after (gen_label_rtx (), scan);
2825 scan = emit_insn_after (gen_align_4 (), scan);
2827 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2828 scan = emit_label_after (lab, scan);
2829 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2837 if (p->mode != HImode)
2839 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2841 lab = XEXP (ref, 0);
2842 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2847 scan = emit_insn_after (gen_consttable_end (), scan);
2848 scan = emit_barrier_after (scan);
2850 pool_window_label = NULL_RTX;
2851 pool_window_last = 0;
2854 /* Return nonzero if constant would be an ok source for a
2855 mov.w instead of a mov.l. */
2861 return (GET_CODE (src) == CONST_INT
2862 && INTVAL (src) >= -32768
2863 && INTVAL (src) <= 32767);
2866 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2868 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2869 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2870 need to fix it if the input value is CONST_OK_FOR_I08. */
2876 if (GET_CODE (insn) == INSN)
2878 rtx pat = PATTERN (insn);
2879 if (GET_CODE (pat) == PARALLEL)
2880 pat = XVECEXP (pat, 0, 0);
2881 if (GET_CODE (pat) == SET
2882 /* We can load any 8 bit value if we don't care what the high
2883 order bits end up as. */
2884 && GET_MODE (SET_DEST (pat)) != QImode
2885 && (CONSTANT_P (SET_SRC (pat))
2886 /* Match mova_const. */
2887 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2888 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2889 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2891 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2892 && (fp_zero_operand (SET_SRC (pat))
2893 || fp_one_operand (SET_SRC (pat)))
2894 /* ??? If this is a -m4 or -m4-single compilation, in general
2895 we don't know the current setting of fpscr, so disable fldi.
2896 There is an exception if this was a register-register move
2897 before reload - and hence it was ascertained that we have
2898 single precision setting - and in a post-reload optimization
2899 we changed this to do a constant load. In that case
2900 we don't have an r0 clobber, hence we must use fldi. */
2901 && (! TARGET_SH4 || TARGET_FMOVD
2902 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2904 && GET_CODE (SET_DEST (pat)) == REG
2905 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2906 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2907 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
2918 return (GET_CODE (insn) == INSN
2919 && GET_CODE (PATTERN (insn)) == SET
2920 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2921 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2922 /* Don't match mova_const. */
2923 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2926 /* Find the last barrier from insn FROM which is close enough to hold the
2927 constant pool. If we can't find one, then create one near the end of
2931 find_barrier (num_mova, mova, from)
2942 int leading_mova = num_mova;
2943 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2947 /* For HImode: range is 510, add 4 because pc counts from address of
2948 second instruction after this one, subtract 2 for the jump instruction
2949 that we may need to emit before the table, subtract 2 for the instruction
2950 that fills the jump delay slot (in very rare cases, reorg will take an
2951 instruction from after the constant pool or will leave the delay slot
2952 empty). This gives 510.
2953 For SImode: range is 1020, add 4 because pc counts from address of
2954 second instruction after this one, subtract 2 in case pc is 2 byte
2955 aligned, subtract 2 for the jump instruction that we may need to emit
2956 before the table, subtract 2 for the instruction that fills the jump
2957 delay slot. This gives 1018. */
2959 /* The branch will always be shortened now that the reference address for
2960 forward branches is the successor address, thus we need no longer make
2961 adjustments to the [sh]i_limit for -O0. */
2966 while (from && count_si < si_limit && count_hi < hi_limit)
2968 int inc = get_attr_length (from);
2971 if (GET_CODE (from) == CODE_LABEL)
2974 new_align = 1 << label_to_alignment (from);
2975 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2976 new_align = 1 << barrier_align (from);
2982 if (GET_CODE (from) == BARRIER)
2985 found_barrier = from;
2987 /* If we are at the end of the function, or in front of an alignment
2988 instruction, we need not insert an extra alignment. We prefer
2989 this kind of barrier. */
2990 if (barrier_align (from) > 2)
2991 good_barrier = from;
2994 if (broken_move (from))
2997 enum machine_mode mode;
2999 pat = PATTERN (from);
3000 if (GET_CODE (pat) == PARALLEL)
3001 pat = XVECEXP (pat, 0, 0);
3002 src = SET_SRC (pat);
3003 dst = SET_DEST (pat);
3004 mode = GET_MODE (dst);
3006 /* We must explicitly check the mode, because sometimes the
3007 front end will generate code to load unsigned constants into
3008 HImode targets without properly sign extending them. */
3010 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3013 /* We put the short constants before the long constants, so
3014 we must count the length of short constants in the range
3015 for the long constants. */
3016 /* ??? This isn't optimal, but is easy to do. */
3021 /* We dump DF/DI constants before SF/SI ones, because
3022 the limit is the same, but the alignment requirements
3023 are higher. We may waste up to 4 additional bytes
3024 for alignment, and the DF/DI constant may have
3025 another SF/SI constant placed before it. */
3026 if (TARGET_SHCOMPACT
3028 && (mode == DFmode || mode == DImode))
3033 while (si_align > 2 && found_si + si_align - 2 > count_si)
3035 if (found_si > count_si)
3036 count_si = found_si;
3037 found_si += GET_MODE_SIZE (mode);
3039 si_limit -= GET_MODE_SIZE (mode);
3042 /* See the code in machine_dependent_reorg, which has a similar if
3043 statement that generates a new mova insn in many cases. */
3044 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3054 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3056 if (found_si > count_si)
3057 count_si = found_si;
3059 else if (GET_CODE (from) == JUMP_INSN
3060 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3061 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3065 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3067 /* We have just passed the barrier in front of the
3068 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3069 the ADDR_DIFF_VEC is accessed as data, just like our pool
3070 constants, this is a good opportunity to accommodate what
3071 we have gathered so far.
3072 If we waited any longer, we could end up at a barrier in
3073 front of code, which gives worse cache usage for separated
3074 instruction / data caches. */
3075 good_barrier = found_barrier;
3080 rtx body = PATTERN (from);
3081 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3084 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3085 else if (GET_CODE (from) == JUMP_INSN
3087 && ! TARGET_SMALLCODE)
3093 if (new_align > si_align)
3095 si_limit -= (count_si - 1) & (new_align - si_align);
3096 si_align = new_align;
3098 count_si = (count_si + new_align - 1) & -new_align;
3103 if (new_align > hi_align)
3105 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3106 hi_align = new_align;
3108 count_hi = (count_hi + new_align - 1) & -new_align;
3110 from = NEXT_INSN (from);
3117 /* Try as we might, the leading mova is out of range. Change
3118 it into a load (which will become a pcload) and retry. */
3119 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3120 INSN_CODE (mova) = -1;
3121 return find_barrier (0, 0, mova);
3125 /* Insert the constant pool table before the mova instruction,
3126 to prevent the mova label reference from going out of range. */
3128 good_barrier = found_barrier = barrier_before_mova;
3134 if (good_barrier && next_real_insn (found_barrier))
3135 found_barrier = good_barrier;
3139 /* We didn't find a barrier in time to dump our stuff,
3140 so we'll make one. */
3141 rtx label = gen_label_rtx ();
3143 /* If we exceeded the range, then we must back up over the last
3144 instruction we looked at. Otherwise, we just need to undo the
3145 NEXT_INSN at the end of the loop. */
3146 if (count_hi > hi_limit || count_si > si_limit)
3147 from = PREV_INSN (PREV_INSN (from));
3149 from = PREV_INSN (from);
3151 /* Walk back to be just before any jump or label.
3152 Putting it before a label reduces the number of times the branch
3153 around the constant pool table will be hit. Putting it before
3154 a jump makes it more likely that the bra delay slot will be
3156 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3157 || GET_CODE (from) == CODE_LABEL)
3158 from = PREV_INSN (from);
3160 from = emit_jump_insn_after (gen_jump (label), from);
3161 JUMP_LABEL (from) = label;
3162 LABEL_NUSES (label) = 1;
3163 found_barrier = emit_barrier_after (from);
3164 emit_label_after (label, found_barrier);
3167 return found_barrier;
3170 /* If the instruction INSN is implemented by a special function, and we can
3171 positively find the register that is used to call the sfunc, and this
3172 register is not used anywhere else in this instruction - except as the
3173 destination of a set, return this register; else, return 0. */
3175 sfunc_uses_reg (insn)
3179 rtx pattern, part, reg_part, reg;
3181 if (GET_CODE (insn) != INSN)
3183 pattern = PATTERN (insn);
3184 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3187 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3189 part = XVECEXP (pattern, 0, i);
3190 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3195 reg = XEXP (reg_part, 0);
3196 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3198 part = XVECEXP (pattern, 0, i);
3199 if (part == reg_part || GET_CODE (part) == CLOBBER)
3201 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3202 && GET_CODE (SET_DEST (part)) == REG)
3203 ? SET_SRC (part) : part)))
3209 /* See if the only way in which INSN uses REG is by calling it, or by
3210 setting it while calling it. Set *SET to a SET rtx if the register
3214 noncall_uses_reg (reg, insn, set)
3223 reg2 = sfunc_uses_reg (insn);
3224 if (reg2 && REGNO (reg2) == REGNO (reg))
3226 pattern = single_set (insn);
3228 && GET_CODE (SET_DEST (pattern)) == REG
3229 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3233 if (GET_CODE (insn) != CALL_INSN)
3235 /* We don't use rtx_equal_p because we don't care if the mode is
3237 pattern = single_set (insn);
3239 && GET_CODE (SET_DEST (pattern)) == REG
3240 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3246 par = PATTERN (insn);
3247 if (GET_CODE (par) == PARALLEL)
3248 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3250 part = XVECEXP (par, 0, i);
3251 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3254 return reg_mentioned_p (reg, SET_SRC (pattern));
3260 pattern = PATTERN (insn);
3262 if (GET_CODE (pattern) == PARALLEL)
3266 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3267 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3269 pattern = XVECEXP (pattern, 0, 0);
3272 if (GET_CODE (pattern) == SET)
3274 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3276 /* We don't use rtx_equal_p, because we don't care if the
3277 mode is different. */
3278 if (GET_CODE (SET_DEST (pattern)) != REG
3279 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3285 pattern = SET_SRC (pattern);
3288 if (GET_CODE (pattern) != CALL
3289 || GET_CODE (XEXP (pattern, 0)) != MEM
3290 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3296 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3297 general registers. Bits 0..15 mean that the respective registers
3298 are used as inputs in the instruction. Bits 16..31 mean that the
3299 registers 0..15, respectively, are used as outputs, or are clobbered.
3300 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3302 regs_used (x, is_dest)
3311 code = GET_CODE (x);
3316 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3317 << (REGNO (x) + is_dest));
3321 rtx y = SUBREG_REG (x);
3323 if (GET_CODE (y) != REG)
3326 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3328 subreg_regno_offset (REGNO (y),
3331 GET_MODE (x)) + is_dest));
3335 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3337 /* If there was a return value, it must have been indicated with USE. */
3352 fmt = GET_RTX_FORMAT (code);
3354 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3359 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3360 used |= regs_used (XVECEXP (x, i, j), is_dest);
3362 else if (fmt[i] == 'e')
3363 used |= regs_used (XEXP (x, i), is_dest);
3368 /* Create an instruction that prevents redirection of a conditional branch
3369 to the destination of the JUMP with address ADDR.
3370 If the branch needs to be implemented as an indirect jump, try to find
3371 a scratch register for it.
3372 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3373 If any preceding insn that doesn't fit into a delay slot is good enough,
3374 pass 1. Pass 2 if a definite blocking insn is needed.
3375 -1 is used internally to avoid deep recursion.
3376 If a blocking instruction is made or recognized, return it. */
3379 gen_block_redirect (jump, addr, need_block)
3381 int addr, need_block;
3384 rtx prev = prev_nonnote_insn (jump);
3387 /* First, check if we already have an instruction that satisfies our need. */
3388 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3390 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3392 if (GET_CODE (PATTERN (prev)) == USE
3393 || GET_CODE (PATTERN (prev)) == CLOBBER
3394 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3396 else if ((need_block &= ~1) < 0)
3398 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3401 /* We can't use JUMP_LABEL here because it might be undefined
3402 when not optimizing. */
3403 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3404 /* If the branch is out of range, try to find a scratch register for it. */
3406 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3410 /* Don't look for the stack pointer as a scratch register,
3411 it would cause trouble if an interrupt occurred. */
3412 unsigned try = 0x7fff, used;
3413 int jump_left = flag_expensive_optimizations + 1;
3415 /* It is likely that the most recent eligible instruction is wanted for
3416 the delay slot. Therefore, find out which registers it uses, and
3417 try to avoid using them. */
3419 for (scan = jump; (scan = PREV_INSN (scan)); )
3423 if (INSN_DELETED_P (scan))
3425 code = GET_CODE (scan);
3426 if (code == CODE_LABEL || code == JUMP_INSN)
3429 && GET_CODE (PATTERN (scan)) != USE
3430 && GET_CODE (PATTERN (scan)) != CLOBBER
3431 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3433 try &= ~regs_used (PATTERN (scan), 0);
3437 for (used = dead = 0, scan = JUMP_LABEL (jump);
3438 (scan = NEXT_INSN (scan)); )
3442 if (INSN_DELETED_P (scan))
3444 code = GET_CODE (scan);
3445 if (GET_RTX_CLASS (code) == 'i')
3447 used |= regs_used (PATTERN (scan), 0);
3448 if (code == CALL_INSN)
3449 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3450 dead |= (used >> 16) & ~used;
3456 if (code == JUMP_INSN)
3458 if (jump_left-- && simplejump_p (scan))
3459 scan = JUMP_LABEL (scan);
3465 /* Mask out the stack pointer again, in case it was
3466 the only 'free' register we have found. */
3469 /* If the immediate destination is still in range, check for possible
3470 threading with a jump beyond the delay slot insn.
3471 Don't check if we are called recursively; the jump has been or will be
3472 checked in a different invocation then. */
3474 else if (optimize && need_block >= 0)
3476 rtx next = next_active_insn (next_active_insn (dest));
3477 if (next && GET_CODE (next) == JUMP_INSN
3478 && GET_CODE (PATTERN (next)) == SET
3479 && recog_memoized (next) == CODE_FOR_jump_compact)
3481 dest = JUMP_LABEL (next);
3483 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3485 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3491 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3493 /* It would be nice if we could convert the jump into an indirect
3494 jump / far branch right now, and thus exposing all constituent
3495 instructions to further optimization. However, reorg uses
3496 simplejump_p to determine if there is an unconditional jump where
3497 it should try to schedule instructions from the target of the
3498 branch; simplejump_p fails for indirect jumps even if they have
3500 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3501 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3503 /* ??? We would like this to have the scope of the jump, but that
3504 scope will change when a delay slot insn of an inner scope is added.
3505 Hence, after delay slot scheduling, we'll have to expect
3506 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3509 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3510 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3513 else if (need_block)
3514 /* We can't use JUMP_LABEL here because it might be undefined
3515 when not optimizing. */
3516 return emit_insn_before (gen_block_branch_redirect
3517 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3522 #define CONDJUMP_MIN -252
3523 #define CONDJUMP_MAX 262
3526 /* A label (to be placed) in front of the jump
3527 that jumps to our ultimate destination. */
3529 /* Where we are going to insert it if we cannot move the jump any farther,
3530 or the jump itself if we have picked up an existing jump. */
3532 /* The ultimate destination. */
3534 struct far_branch *prev;
3535 /* If the branch has already been created, its address;
3536 else the address of its first prospective user. */
3540 static void gen_far_branch PARAMS ((struct far_branch *));
3541 enum mdep_reorg_phase_e mdep_reorg_phase;
3544 struct far_branch *bp;
3546 rtx insn = bp->insert_place;
3548 rtx label = gen_label_rtx ();
3550 emit_label_after (label, insn);
3553 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3554 LABEL_NUSES (bp->far_label)++;
3557 jump = emit_jump_insn_after (gen_return (), insn);
3558 /* Emit a barrier so that reorg knows that any following instructions
3559 are not reachable via a fall-through path.
3560 But don't do this when not optimizing, since we wouldn't supress the
3561 alignment for the barrier then, and could end up with out-of-range
3562 pc-relative loads. */
3564 emit_barrier_after (jump);
3565 emit_label_after (bp->near_label, insn);
3566 JUMP_LABEL (jump) = bp->far_label;
3567 if (! invert_jump (insn, label, 1))
3570 (gen_stuff_delay_slot
3571 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3572 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3574 /* Prevent reorg from undoing our splits. */
3575 gen_block_redirect (jump, bp->address += 2, 2);
3578 /* Fix up ADDR_DIFF_VECs. */
3580 fixup_addr_diff_vecs (first)
3585 for (insn = first; insn; insn = NEXT_INSN (insn))
3587 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3589 if (GET_CODE (insn) != JUMP_INSN
3590 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3592 pat = PATTERN (insn);
3593 vec_lab = XEXP (XEXP (pat, 0), 0);
3595 /* Search the matching casesi_jump_2. */
3596 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3598 if (GET_CODE (prev) != JUMP_INSN)
3600 prevpat = PATTERN (prev);
3601 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3603 x = XVECEXP (prevpat, 0, 1);
3604 if (GET_CODE (x) != USE)
3607 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3611 /* Emit the reference label of the braf where it belongs, right after
3612 the casesi_jump_2 (i.e. braf). */
3613 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3614 emit_label_after (braf_label, prev);
3616 /* Fix up the ADDR_DIF_VEC to be relative
3617 to the reference address of the braf. */
3618 XEXP (XEXP (pat, 0), 0) = braf_label;
3622 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3623 a barrier. Return the base 2 logarithm of the desired alignment. */
3625 barrier_align (barrier_or_label)
3626 rtx barrier_or_label;
3628 rtx next = next_real_insn (barrier_or_label), pat, prev;
3629 int slot, credit, jump_to_next = 0;
3634 pat = PATTERN (next);
3636 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3639 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3640 /* This is a barrier in front of a constant table. */
3643 prev = prev_real_insn (barrier_or_label);
3644 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3646 pat = PATTERN (prev);
3647 /* If this is a very small table, we want to keep the alignment after
3648 the table to the minimum for proper code alignment. */
3649 return ((TARGET_SMALLCODE
3650 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3651 <= (unsigned)1 << (CACHE_LOG - 2)))
3652 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3655 if (TARGET_SMALLCODE)
3658 if (! TARGET_SH2 || ! optimize)
3659 return align_jumps_log;
3661 /* When fixing up pcloads, a constant table might be inserted just before
3662 the basic block that ends with the barrier. Thus, we can't trust the
3663 instruction lengths before that. */
3664 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3666 /* Check if there is an immediately preceding branch to the insn beyond
3667 the barrier. We must weight the cost of discarding useful information
3668 from the current cache line when executing this branch and there is
3669 an alignment, against that of fetching unneeded insn in front of the
3670 branch target when there is no alignment. */
3672 /* There are two delay_slot cases to consider. One is the simple case
3673 where the preceding branch is to the insn beyond the barrier (simple
3674 delay slot filling), and the other is where the preceding branch has
3675 a delay slot that is a duplicate of the insn after the barrier
3676 (fill_eager_delay_slots) and the branch is to the insn after the insn
3677 after the barrier. */
3679 /* PREV is presumed to be the JUMP_INSN for the barrier under
3680 investigation. Skip to the insn before it. */
3681 prev = prev_real_insn (prev);
3683 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3684 credit >= 0 && prev && GET_CODE (prev) == INSN;
3685 prev = prev_real_insn (prev))
3688 if (GET_CODE (PATTERN (prev)) == USE
3689 || GET_CODE (PATTERN (prev)) == CLOBBER)
3691 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3693 prev = XVECEXP (PATTERN (prev), 0, 1);
3694 if (INSN_UID (prev) == INSN_UID (next))
3696 /* Delay slot was filled with insn at jump target. */
3703 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3705 credit -= get_attr_length (prev);
3708 && GET_CODE (prev) == JUMP_INSN
3709 && JUMP_LABEL (prev))
3713 || next_real_insn (JUMP_LABEL (prev)) == next
3714 /* If relax_delay_slots() decides NEXT was redundant
3715 with some previous instruction, it will have
3716 redirected PREV's jump to the following insn. */
3717 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3718 /* There is no upper bound on redundant instructions
3719 that might have been skipped, but we must not put an
3720 alignment where none had been before. */
3721 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3723 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3724 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3725 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3727 rtx pat = PATTERN (prev);
3728 if (GET_CODE (pat) == PARALLEL)
3729 pat = XVECEXP (pat, 0, 0);
3730 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3736 return align_jumps_log;
3739 /* If we are inside a phony loop, almost any kind of label can turn up as the
3740 first one in the loop. Aligning a braf label causes incorrect switch
3741 destination addresses; we can detect braf labels because they are
3742 followed by a BARRIER.
3743 Applying loop alignment to small constant or switch tables is a waste
3744 of space, so we suppress this too. */
3746 sh_loop_align (label)
3752 next = next_nonnote_insn (next);
3753 while (next && GET_CODE (next) == CODE_LABEL);
3757 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3758 || recog_memoized (next) == CODE_FOR_consttable_2)
3761 return align_loops_log;
3764 /* Do a final pass over the function, just before delayed branch
3770 rtx first, insn, mova = NULL_RTX;
3772 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3773 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3775 first = get_insns ();
3777 /* We must split call insns before introducing `mova's. If we're
3778 optimizing, they'll have already been split. Otherwise, make
3779 sure we don't split them too late. */
3781 split_all_insns_noflow ();
3786 /* If relaxing, generate pseudo-ops to associate function calls with
3787 the symbols they call. It does no harm to not generate these
3788 pseudo-ops. However, when we can generate them, it enables to
3789 linker to potentially relax the jsr to a bsr, and eliminate the
3790 register load and, possibly, the constant pool entry. */
3792 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3795 /* Remove all REG_LABEL notes. We want to use them for our own
3796 purposes. This works because none of the remaining passes
3797 need to look at them.
3799 ??? But it may break in the future. We should use a machine
3800 dependent REG_NOTE, or some other approach entirely. */
3801 for (insn = first; insn; insn = NEXT_INSN (insn))
3807 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3808 remove_note (insn, note);
3812 for (insn = first; insn; insn = NEXT_INSN (insn))
3814 rtx pattern, reg, link, set, scan, dies, label;
3815 int rescan = 0, foundinsn = 0;
3817 if (GET_CODE (insn) == CALL_INSN)
3819 pattern = PATTERN (insn);
3821 if (GET_CODE (pattern) == PARALLEL)
3822 pattern = XVECEXP (pattern, 0, 0);
3823 if (GET_CODE (pattern) == SET)
3824 pattern = SET_SRC (pattern);
3826 if (GET_CODE (pattern) != CALL
3827 || GET_CODE (XEXP (pattern, 0)) != MEM)
3830 reg = XEXP (XEXP (pattern, 0), 0);
3834 reg = sfunc_uses_reg (insn);
3839 if (GET_CODE (reg) != REG)
3842 /* This is a function call via REG. If the only uses of REG
3843 between the time that it is set and the time that it dies
3844 are in function calls, then we can associate all the
3845 function calls with the setting of REG. */
3847 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3849 if (REG_NOTE_KIND (link) != 0)
3851 set = single_set (XEXP (link, 0));
3852 if (set && rtx_equal_p (reg, SET_DEST (set)))
3854 link = XEXP (link, 0);
3861 /* ??? Sometimes global register allocation will have
3862 deleted the insn pointed to by LOG_LINKS. Try
3863 scanning backward to find where the register is set. */
3864 for (scan = PREV_INSN (insn);
3865 scan && GET_CODE (scan) != CODE_LABEL;
3866 scan = PREV_INSN (scan))
3868 if (! INSN_P (scan))
3871 if (! reg_mentioned_p (reg, scan))
3874 if (noncall_uses_reg (reg, scan, &set))
3888 /* The register is set at LINK. */
3890 /* We can only optimize the function call if the register is
3891 being set to a symbol. In theory, we could sometimes
3892 optimize calls to a constant location, but the assembler
3893 and linker do not support that at present. */
3894 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3895 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3898 /* Scan forward from LINK to the place where REG dies, and
3899 make sure that the only insns which use REG are
3900 themselves function calls. */
3902 /* ??? This doesn't work for call targets that were allocated
3903 by reload, since there may not be a REG_DEAD note for the
3907 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3911 /* Don't try to trace forward past a CODE_LABEL if we haven't
3912 seen INSN yet. Ordinarily, we will only find the setting insn
3913 in LOG_LINKS if it is in the same basic block. However,
3914 cross-jumping can insert code labels in between the load and
3915 the call, and can result in situations where a single call
3916 insn may have two targets depending on where we came from. */
3918 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3921 if (! INSN_P (scan))
3924 /* Don't try to trace forward past a JUMP. To optimize
3925 safely, we would have to check that all the
3926 instructions at the jump destination did not use REG. */
3928 if (GET_CODE (scan) == JUMP_INSN)
3931 if (! reg_mentioned_p (reg, scan))
3934 if (noncall_uses_reg (reg, scan, &scanset))
3941 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3943 /* There is a function call to this register other
3944 than the one we are checking. If we optimize
3945 this call, we need to rescan again below. */
3949 /* ??? We shouldn't have to worry about SCANSET here.
3950 We should just be able to check for a REG_DEAD note
3951 on a function call. However, the REG_DEAD notes are
3952 apparently not dependable around libcalls; c-torture
3953 execute/920501-2 is a test case. If SCANSET is set,
3954 then this insn sets the register, so it must have
3955 died earlier. Unfortunately, this will only handle
3956 the cases in which the register is, in fact, set in a
3959 /* ??? We shouldn't have to use FOUNDINSN here.
3960 However, the LOG_LINKS fields are apparently not
3961 entirely reliable around libcalls;
3962 newlib/libm/math/e_pow.c is a test case. Sometimes
3963 an insn will appear in LOG_LINKS even though it is
3964 not the most recent insn which sets the register. */
3968 || find_reg_note (scan, REG_DEAD, reg)))
3977 /* Either there was a branch, or some insn used REG
3978 other than as a function call address. */
3982 /* Create a code label, and put it in a REG_LABEL note on
3983 the insn which sets the register, and on each call insn
3984 which uses the register. In final_prescan_insn we look
3985 for the REG_LABEL notes, and output the appropriate label
3988 label = gen_label_rtx ();
3989 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3991 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4000 scan = NEXT_INSN (scan);
4002 && ((GET_CODE (scan) == CALL_INSN
4003 && reg_mentioned_p (reg, scan))
4004 || ((reg2 = sfunc_uses_reg (scan))
4005 && REGNO (reg2) == REGNO (reg))))
4007 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4009 while (scan != dies);
4015 fixup_addr_diff_vecs (first);
4019 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4020 shorten_branches (first);
4022 /* Scan the function looking for move instructions which have to be
4023 changed to pc-relative loads and insert the literal tables. */
4025 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4026 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4033 else if (GET_CODE (insn) == JUMP_INSN
4034 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4042 /* Some code might have been inserted between the mova and
4043 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4044 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4045 total += get_attr_length (scan);
4047 /* range of mova is 1020, add 4 because pc counts from address of
4048 second instruction after this one, subtract 2 in case pc is 2
4049 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4050 cancels out with alignment effects of the mova itself. */
4053 /* Change the mova into a load, and restart scanning
4054 there. broken_move will then return true for mova. */
4055 SET_SRC (PATTERN (mova))
4056 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4057 INSN_CODE (mova) = -1;
4061 if (broken_move (insn))
4064 /* Scan ahead looking for a barrier to stick the constant table
4066 rtx barrier = find_barrier (num_mova, mova, insn);
4067 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4069 if (num_mova && ! mova_p (mova))
4071 /* find_barrier had to change the first mova into a
4072 pcload; thus, we have to start with this new pcload. */
4076 /* Now find all the moves between the points and modify them. */
4077 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4079 if (GET_CODE (scan) == CODE_LABEL)
4081 if (broken_move (scan))
4083 rtx *patp = &PATTERN (scan), pat = *patp;
4087 enum machine_mode mode;
4089 if (GET_CODE (pat) == PARALLEL)
4090 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4091 src = SET_SRC (pat);
4092 dst = SET_DEST (pat);
4093 mode = GET_MODE (dst);
4095 if (mode == SImode && hi_const (src)
4096 && REGNO (dst) != FPUL_REG)
4101 while (GET_CODE (dst) == SUBREG)
4103 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4104 GET_MODE (SUBREG_REG (dst)),
4107 dst = SUBREG_REG (dst);
4109 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4112 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4114 /* This must be an insn that clobbers r0. */
4115 rtx clobber = XVECEXP (PATTERN (scan), 0,
4116 XVECLEN (PATTERN (scan), 0) - 1);
4118 if (GET_CODE (clobber) != CLOBBER
4119 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4123 && reg_set_between_p (r0_rtx, last_float_move, scan))
4127 && GET_MODE_SIZE (mode) != 4
4128 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4130 lab = add_constant (src, mode, last_float);
4132 emit_insn_before (gen_mova (lab), scan);
4135 /* There will be a REG_UNUSED note for r0 on
4136 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4137 lest reorg:mark_target_live_regs will not
4138 consider r0 to be used, and we end up with delay
4139 slot insn in front of SCAN that clobbers r0. */
4141 = find_regno_note (last_float_move, REG_UNUSED, 0);
4143 /* If we are not optimizing, then there may not be
4146 PUT_MODE (note, REG_INC);
4148 *last_float_addr = r0_inc_rtx;
4150 last_float_move = scan;
4152 newsrc = gen_rtx (MEM, mode,
4153 (((TARGET_SH4 && ! TARGET_FMOVD)
4154 || REGNO (dst) == FPUL_REG)
4157 last_float_addr = &XEXP (newsrc, 0);
4159 /* Remove the clobber of r0. */
4160 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4161 RTX_UNCHANGING_P (newsrc) = 1;
4163 /* This is a mova needing a label. Create it. */
4164 else if (GET_CODE (src) == UNSPEC
4165 && XINT (src, 1) == UNSPEC_MOVA
4166 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4168 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4169 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4170 newsrc = gen_rtx_UNSPEC (SImode,
4171 gen_rtvec (1, newsrc),
4176 lab = add_constant (src, mode, 0);
4177 newsrc = gen_rtx_MEM (mode,
4178 gen_rtx_LABEL_REF (VOIDmode, lab));
4179 RTX_UNCHANGING_P (newsrc) = 1;
4181 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4182 INSN_CODE (scan) = -1;
4185 dump_table (barrier);
4190 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4191 INSN_ADDRESSES_FREE ();
4192 split_branches (first);
4194 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4195 also has an effect on the register that holds the address of the sfunc.
4196 Insert an extra dummy insn in front of each sfunc that pretends to
4197 use this register. */
4198 if (flag_delayed_branch)
4200 for (insn = first; insn; insn = NEXT_INSN (insn))
4202 rtx reg = sfunc_uses_reg (insn);
4206 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4210 /* fpscr is not actually a user variable, but we pretend it is for the
4211 sake of the previous optimization passes, since we want it handled like
4212 one. However, we don't have any debugging information for it, so turn
4213 it into a non-user variable now. */
4215 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4217 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4221 get_dest_uid (label, max_uid)
4225 rtx dest = next_real_insn (label);
4228 /* This can happen for an undefined label. */
4230 dest_uid = INSN_UID (dest);
4231 /* If this is a newly created branch redirection blocking instruction,
4232 we cannot index the branch_uid or insn_addresses arrays with its
4233 uid. But then, we won't need to, because the actual destination is
4234 the following branch. */
4235 while (dest_uid >= max_uid)
4237 dest = NEXT_INSN (dest);
4238 dest_uid = INSN_UID (dest);
4240 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4245 /* Split condbranches that are out of range. Also add clobbers for
4246 scratch registers that are needed in far jumps.
4247 We do this before delay slot scheduling, so that it can take our
4248 newly created instructions into account. It also allows us to
4249 find branches with common targets more easily. */
4252 split_branches (first)
4256 struct far_branch **uid_branch, *far_branch_list = 0;
4257 int max_uid = get_max_uid ();
4259 /* Find out which branches are out of range. */
4260 shorten_branches (first);
4262 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4263 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4265 for (insn = first; insn; insn = NEXT_INSN (insn))
4266 if (! INSN_P (insn))
4268 else if (INSN_DELETED_P (insn))
4270 /* Shorten_branches would split this instruction again,
4271 so transform it into a note. */
4272 PUT_CODE (insn, NOTE);
4273 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4274 NOTE_SOURCE_FILE (insn) = 0;
4276 else if (GET_CODE (insn) == JUMP_INSN
4277 /* Don't mess with ADDR_DIFF_VEC */
4278 && (GET_CODE (PATTERN (insn)) == SET
4279 || GET_CODE (PATTERN (insn)) == RETURN))
4281 enum attr_type type = get_attr_type (insn);
4282 if (type == TYPE_CBRANCH)
4286 if (get_attr_length (insn) > 4)
4288 rtx src = SET_SRC (PATTERN (insn));
4289 rtx olabel = XEXP (XEXP (src, 1), 0);
4290 int addr = INSN_ADDRESSES (INSN_UID (insn));
4292 int dest_uid = get_dest_uid (olabel, max_uid);
4293 struct far_branch *bp = uid_branch[dest_uid];
4295 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4296 the label if the LABEL_NUSES count drops to zero. There is
4297 always a jump_optimize pass that sets these values, but it
4298 proceeds to delete unreferenced code, and then if not
4299 optimizing, to un-delete the deleted instructions, thus
4300 leaving labels with too low uses counts. */
4303 JUMP_LABEL (insn) = olabel;
4304 LABEL_NUSES (olabel)++;
4308 bp = (struct far_branch *) alloca (sizeof *bp);
4309 uid_branch[dest_uid] = bp;
4310 bp->prev = far_branch_list;
4311 far_branch_list = bp;
4313 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4314 LABEL_NUSES (bp->far_label)++;
4318 label = bp->near_label;
4319 if (! label && bp->address - addr >= CONDJUMP_MIN)
4321 rtx block = bp->insert_place;
4323 if (GET_CODE (PATTERN (block)) == RETURN)
4324 block = PREV_INSN (block);
4326 block = gen_block_redirect (block,
4328 label = emit_label_after (gen_label_rtx (),
4330 bp->near_label = label;
4332 else if (label && ! NEXT_INSN (label))
4334 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4335 bp->insert_place = insn;
4337 gen_far_branch (bp);
4341 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4343 bp->near_label = label = gen_label_rtx ();
4344 bp->insert_place = insn;
4347 if (! redirect_jump (insn, label, 1))
4352 /* get_attr_length (insn) == 2 */
4353 /* Check if we have a pattern where reorg wants to redirect
4354 the branch to a label from an unconditional branch that
4356 /* We can't use JUMP_LABEL here because it might be undefined
4357 when not optimizing. */
4358 /* A syntax error might cause beyond to be NULL_RTX. */
4360 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4364 && (GET_CODE (beyond) == JUMP_INSN
4365 || ((beyond = next_active_insn (beyond))
4366 && GET_CODE (beyond) == JUMP_INSN))
4367 && GET_CODE (PATTERN (beyond)) == SET
4368 && recog_memoized (beyond) == CODE_FOR_jump_compact
4370 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4371 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4373 gen_block_redirect (beyond,
4374 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4377 next = next_active_insn (insn);
4379 if ((GET_CODE (next) == JUMP_INSN
4380 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4381 && GET_CODE (PATTERN (next)) == SET
4382 && recog_memoized (next) == CODE_FOR_jump_compact
4384 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4385 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4387 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4389 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4391 int addr = INSN_ADDRESSES (INSN_UID (insn));
4394 struct far_branch *bp;
4396 if (type == TYPE_JUMP)
4398 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4399 dest_uid = get_dest_uid (far_label, max_uid);
4402 /* Parse errors can lead to labels outside
4404 if (! NEXT_INSN (far_label))
4409 JUMP_LABEL (insn) = far_label;
4410 LABEL_NUSES (far_label)++;
4412 redirect_jump (insn, NULL_RTX, 1);
4416 bp = uid_branch[dest_uid];
4419 bp = (struct far_branch *) alloca (sizeof *bp);
4420 uid_branch[dest_uid] = bp;
4421 bp->prev = far_branch_list;
4422 far_branch_list = bp;
4424 bp->far_label = far_label;
4426 LABEL_NUSES (far_label)++;
4428 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4429 if (addr - bp->address <= CONDJUMP_MAX)
4430 emit_label_after (bp->near_label, PREV_INSN (insn));
4433 gen_far_branch (bp);
4439 bp->insert_place = insn;
4441 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4443 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4446 /* Generate all pending far branches,
4447 and free our references to the far labels. */
4448 while (far_branch_list)
4450 if (far_branch_list->near_label
4451 && ! NEXT_INSN (far_branch_list->near_label))
4452 gen_far_branch (far_branch_list);
4454 && far_branch_list->far_label
4455 && ! --LABEL_NUSES (far_branch_list->far_label))
4456 delete_insn (far_branch_list->far_label);
4457 far_branch_list = far_branch_list->prev;
4460 /* Instruction length information is no longer valid due to the new
4461 instructions that have been generated. */
4462 init_insn_lengths ();
4465 /* Dump out instruction addresses, which is useful for debugging the
4466 constant pool table stuff.
4468 If relaxing, output the label and pseudo-ops used to link together
4469 calls and the instruction which set the registers. */
4471 /* ??? The addresses printed by this routine for insns are nonsense for
4472 insns which are inside of a sequence where none of the inner insns have
4473 variable length. This is because the second pass of shorten_branches
4474 does not bother to update them. */
4477 final_prescan_insn (insn, opvec, noperands)
4479 rtx *opvec ATTRIBUTE_UNUSED;
4480 int noperands ATTRIBUTE_UNUSED;
4482 if (TARGET_DUMPISIZE)
4483 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4489 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4494 pattern = PATTERN (insn);
4495 if (GET_CODE (pattern) == PARALLEL)
4496 pattern = XVECEXP (pattern, 0, 0);
4497 if (GET_CODE (pattern) == CALL
4498 || (GET_CODE (pattern) == SET
4499 && (GET_CODE (SET_SRC (pattern)) == CALL
4500 || get_attr_type (insn) == TYPE_SFUNC)))
4501 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4502 CODE_LABEL_NUMBER (XEXP (note, 0)));
4503 else if (GET_CODE (pattern) == SET)
4504 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4505 CODE_LABEL_NUMBER (XEXP (note, 0)));
4512 /* Dump out any constants accumulated in the final pass. These will
4516 output_jump_label_table ()
4522 fprintf (asm_out_file, "\t.align 2\n");
4523 for (i = 0; i < pool_size; i++)
4525 pool_node *p = &pool_vector[i];
4527 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4528 CODE_LABEL_NUMBER (p->label));
4529 output_asm_insn (".long %O0", &p->value);
4537 /* A full frame looks like:
4541 [ if current_function_anonymous_args
4554 local-0 <- fp points here. */
4556 /* Number of bytes pushed for anonymous args, used to pass information
4557 between expand_prologue and expand_epilogue. */
4559 static int extra_push;
4561 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4562 to be adjusted, and TEMP, if nonnegative, holds the register number
4563 of a general register that we may clobber. */
4566 output_stack_adjust (size, reg, temp, emit_fn)
4570 rtx (*emit_fn) PARAMS ((rtx));
4574 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4579 if (CONST_OK_FOR_ADD (size))
4580 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4581 /* Try to do it with two partial adjustments; however, we must make
4582 sure that the stack is properly aligned at all times, in case
4583 an interrupt occurs between the two partial adjustments. */
4584 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4585 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4587 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4588 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4595 /* If TEMP is invalid, we could temporarily save a general
4596 register to MACL. However, there is currently no need
4597 to handle this case, so just abort when we see it. */
4600 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4602 /* If SIZE is negative, subtract the positive value.
4603 This sometimes allows a constant pool entry to be shared
4604 between prologue and epilogue code. */
4607 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4608 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4612 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4613 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4615 if (emit_fn == frame_insn)
4617 = (gen_rtx_EXPR_LIST
4618 (REG_FRAME_RELATED_EXPR,
4619 gen_rtx_SET (VOIDmode, reg,
4620 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4631 RTX_FRAME_RELATED_P (x) = 1;
4635 /* Output RTL to push register RN onto the stack. */
4643 x = gen_push_fpul ();
4644 else if (rn == FPSCR_REG)
4645 x = gen_push_fpscr ();
4646 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4647 && FP_OR_XD_REGISTER_P (rn))
4649 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4651 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4653 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4654 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4656 x = gen_push (gen_rtx_REG (SImode, rn));
4660 = gen_rtx_EXPR_LIST (REG_INC,
4661 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4665 /* Output RTL to pop register RN from the stack. */
4673 x = gen_pop_fpul ();
4674 else if (rn == FPSCR_REG)
4675 x = gen_pop_fpscr ();
4676 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4677 && FP_OR_XD_REGISTER_P (rn))
4679 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4681 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4683 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4684 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4686 x = gen_pop (gen_rtx_REG (SImode, rn));
4690 = gen_rtx_EXPR_LIST (REG_INC,
4691 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4694 /* Generate code to push the regs specified in the mask. */
4697 push_regs (mask, interrupt_handler)
4699 int interrupt_handler;
4704 /* Push PR last; this gives better latencies after the prologue, and
4705 candidates for the return delay slot when there are no general
4706 registers pushed. */
4707 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4709 /* If this is an interrupt handler, and the SZ bit varies,
4710 and we have to push any floating point register, we need
4711 to switch to the correct precision first. */
4712 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4713 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4715 HARD_REG_SET unsaved;
4718 COMPL_HARD_REG_SET(unsaved, *mask);
4719 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4723 && (i != FPSCR_REG || ! skip_fpscr)
4724 && TEST_HARD_REG_BIT (*mask, i))
4727 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4731 /* Calculate how much extra space is needed to save all callee-saved
4733 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4736 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
4739 int stack_space = 0;
4740 int interrupt_handler = sh_cfun_interrupt_handler_p ();
4742 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4743 if ((! call_used_regs[reg] || interrupt_handler)
4744 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4745 /* Leave space to save this target register on the stack,
4746 in case target register allocation wants to use it. */
4747 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4751 /* Decide whether we should reserve space for callee-save target registers,
4752 in case target register allocation wants to use them. REGS_SAVED is
4753 the space, in bytes, that is already required for register saves.
4754 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4757 shmedia_reserve_space_for_target_registers_p (int regs_saved,
4758 HARD_REG_SET *live_regs_mask)
4762 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
4765 /* Decide how much space to reserve for callee-save target registers
4766 in case target register allocation wants to use them.
4767 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4770 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
4772 if (shmedia_space_reserved_for_target_registers)
4773 return shmedia_target_regs_stack_space (live_regs_mask);
4778 /* Work out the registers which need to be saved, both as a mask and a
4779 count of saved words. Return the count.
4781 If doing a pragma interrupt function, then push all regs used by the
4782 function, and if we call another function (we can tell by looking at PR),
4783 make sure that all the regs it clobbers are safe too. */
4786 calc_live_regs (live_regs_mask)
4787 HARD_REG_SET *live_regs_mask;
4791 int interrupt_handler;
4794 interrupt_handler = sh_cfun_interrupt_handler_p ();
4796 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4797 CLEAR_HARD_REG_SET (*live_regs_mask);
4798 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4799 && regs_ever_live[FPSCR_REG])
4800 target_flags &= ~FPU_SINGLE_BIT;
4801 /* If we can save a lot of saves by switching to double mode, do that. */
4802 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4803 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4804 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4805 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4808 target_flags &= ~FPU_SINGLE_BIT;
4811 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4812 knows how to use it. That means the pseudo originally allocated for
4813 the initial value can become the PR_MEDIA_REG hard register, as seen for
4814 execute/20010122-1.c:test9. */
4816 pr_live = regs_ever_live[PR_MEDIA_REG];
4819 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4820 pr_live = (pr_initial
4821 ? (GET_CODE (pr_initial) != REG
4822 || REGNO (pr_initial) != (PR_REG))
4823 : regs_ever_live[PR_REG]);
4825 /* Force PR to be live if the prologue has to call the SHmedia
4826 argument decoder or register saver. */
4827 if (TARGET_SHCOMPACT
4828 && ((current_function_args_info.call_cookie
4829 & ~ CALL_COOKIE_RET_TRAMP (1))
4830 || current_function_has_nonlocal_label))
4832 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4834 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4836 : (interrupt_handler && ! pragma_trapa)
4837 ? (/* Need to save all the regs ever live. */
4838 (regs_ever_live[reg]
4839 || (call_used_regs[reg]
4840 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4842 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4843 && reg != RETURN_ADDRESS_POINTER_REGNUM
4844 && reg != T_REG && reg != GBR_REG
4845 /* Push fpscr only on targets which have FPU */
4846 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4847 : (/* Only push those regs which are used and need to be saved. */
4850 && current_function_args_info.call_cookie
4851 && reg == PIC_OFFSET_TABLE_REGNUM)
4852 || (regs_ever_live[reg] && ! call_used_regs[reg])
4853 || (current_function_calls_eh_return
4854 && (reg == EH_RETURN_DATA_REGNO (0)
4855 || reg == EH_RETURN_DATA_REGNO (1)
4856 || reg == EH_RETURN_DATA_REGNO (2)
4857 || reg == EH_RETURN_DATA_REGNO (3)))))
4859 SET_HARD_REG_BIT (*live_regs_mask, reg);
4860 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4862 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4863 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4865 if (FP_REGISTER_P (reg))
4867 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4869 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
4870 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4873 else if (XD_REGISTER_P (reg))
4875 /* Must switch to double mode to access these registers. */
4876 target_flags &= ~FPU_SINGLE_BIT;
4881 /* If we have a target register optimization pass after prologue / epilogue
4882 threading, we need to assume all target registers will be live even if
4884 if (flag_branch_target_load_optimize2
4885 && TARGET_SAVE_ALL_TARGET_REGS
4886 && shmedia_space_reserved_for_target_registers)
4887 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4888 if ((! call_used_regs[reg] || interrupt_handler)
4889 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4891 SET_HARD_REG_BIT (*live_regs_mask, reg);
4892 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4898 /* Code to generate prologue and epilogue sequences */
4900 /* PUSHED is the number of bytes that are being pushed on the
4901 stack for register saves. Return the frame size, padded
4902 appropriately so that the stack stays properly aligned. */
4903 static HOST_WIDE_INT
4904 rounded_frame_size (pushed)
4907 HOST_WIDE_INT size = get_frame_size ();
4908 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4910 return ((size + pushed + align - 1) & -align) - pushed;
4913 /* Choose a call-clobbered target-branch register that remains
4914 unchanged along the whole function. We set it up as the return
4915 value in the prologue. */
4917 sh_media_register_for_return ()
4922 if (! current_function_is_leaf)
4925 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4927 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4928 if (call_used_regs[regno] && ! regs_ever_live[regno])
4935 sh_expand_prologue ()
4937 HARD_REG_SET live_regs_mask;
4940 int save_flags = target_flags;
4942 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4944 /* We have pretend args if we had an object sent partially in registers
4945 and partially on the stack, e.g. a large structure. */
4946 output_stack_adjust (-current_function_pretend_args_size
4947 - current_function_args_info.stack_regs * 8,
4948 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4952 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4953 /* We're going to use the PIC register to load the address of the
4954 incoming-argument decoder and/or of the return trampoline from
4955 the GOT, so make sure the PIC register is preserved and
4957 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4959 if (TARGET_SHCOMPACT
4960 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4964 /* First, make all registers with incoming arguments that will
4965 be pushed onto the stack live, so that register renaming
4966 doesn't overwrite them. */
4967 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4968 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4969 >= NPARM_REGS (SImode) - reg)
4970 for (; reg < NPARM_REGS (SImode); reg++)
4971 emit_insn (gen_shcompact_preserve_incoming_args
4972 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4973 else if (CALL_COOKIE_INT_REG_GET
4974 (current_function_args_info.call_cookie, reg) == 1)
4975 emit_insn (gen_shcompact_preserve_incoming_args
4976 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4978 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4980 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4981 GEN_INT (current_function_args_info.call_cookie));
4982 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4983 gen_rtx_REG (SImode, R0_REG));
4985 else if (TARGET_SHMEDIA)
4987 int tr = sh_media_register_for_return ();
4991 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4992 gen_rtx_REG (DImode, PR_MEDIA_REG));
4994 /* If this function only exits with sibcalls, this copy
4995 will be flagged as dead. */
4996 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5002 /* Emit the code for SETUP_VARARGS. */
5003 if (current_function_stdarg)
5005 /* This is not used by the SH2E calling convention */
5006 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 && ! TARGET_HITACHI)
5008 /* Push arg regs as if they'd been provided by caller in stack. */
5009 for (i = 0; i < NPARM_REGS(SImode); i++)
5011 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5014 if (i >= (NPARM_REGS(SImode)
5015 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5019 RTX_FRAME_RELATED_P (insn) = 0;
5025 /* If we're supposed to switch stacks at function entry, do so now. */
5027 emit_insn (gen_sp_switch_1 ());
5029 d = calc_live_regs (&live_regs_mask);
5030 /* ??? Maybe we could save some switching if we can move a mode switch
5031 that already happens to be at the function start into the prologue. */
5032 if (target_flags != save_flags && ! current_function_interrupt)
5033 emit_insn (gen_toggle_sz ());
5040 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5041 int offset_in_r0 = -1;
5043 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5044 int total_size, save_size;
5046 /* D is the actual number of bytes that we need for saving registers,
5047 however, in initial_elimination_offset we have committed to using
5048 an additional TREGS_SPACE amount of bytes - in order to keep both
5049 addresses to arguments supplied by the caller and local variables
5050 valid, we must keep this gap. Place it between the incoming
5051 arguments and the actually saved registers in a bid to optimize
5052 locality of reference. */
5053 total_size = d + tregs_space;
5054 total_size += rounded_frame_size (total_size);
5055 save_size = total_size - rounded_frame_size (d);
5056 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5057 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5058 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5060 /* If adjusting the stack in a single step costs nothing extra, do so.
5061 I.e. either if a single addi is enough, or we need a movi anyway,
5062 and we don't exceed the maximum offset range (the test for the
5063 latter is conservative for simplicity). */
5065 && (CONST_OK_FOR_I10 (-total_size)
5066 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5067 && total_size <= 2044)))
5068 d_rounding = total_size - save_size;
5070 offset = d + d_rounding;
5072 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5075 /* We loop twice: first, we save 8-byte aligned registers in the
5076 higher addresses, that are known to be aligned. Then, we
5077 proceed to saving 32-bit registers that don't need 8-byte
5079 /* Note that if you change this code in a way that affects where
5080 the return register is saved, you have to update not only
5081 sh_expand_epilogue, but also sh_set_return_address. */
5082 for (align = 1; align >= 0; align--)
5083 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5084 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5086 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5088 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5090 if (mode == SFmode && (i % 2) == 1
5091 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5092 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5099 /* If we're doing the aligned pass and this is not aligned,
5100 or we're doing the unaligned pass and this is aligned,
5102 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5106 offset -= GET_MODE_SIZE (mode);
5108 reg_rtx = gen_rtx_REG (mode, reg);
5110 mem_rtx = gen_rtx_MEM (mode,
5111 gen_rtx_PLUS (Pmode,
5115 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5121 if (HAVE_PRE_DECREMENT
5122 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5123 || mem_rtx == NULL_RTX
5124 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5126 pre_dec = gen_rtx_MEM (mode,
5127 gen_rtx_PRE_DEC (Pmode, r0));
5129 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5138 offset += GET_MODE_SIZE (mode);
5142 if (mem_rtx != NULL_RTX)
5145 if (offset_in_r0 == -1)
5147 emit_move_insn (r0, GEN_INT (offset));
5148 offset_in_r0 = offset;
5150 else if (offset != offset_in_r0)
5155 GEN_INT (offset - offset_in_r0)));
5156 offset_in_r0 += offset - offset_in_r0;
5159 if (pre_dec != NULL_RTX)
5165 (Pmode, r0, stack_pointer_rtx));
5169 offset -= GET_MODE_SIZE (mode);
5170 offset_in_r0 -= GET_MODE_SIZE (mode);
5175 mem_rtx = gen_rtx_MEM (mode, r0);
5177 mem_rtx = gen_rtx_MEM (mode,
5178 gen_rtx_PLUS (Pmode,
5182 /* We must not use an r0-based address for target-branch
5183 registers or for special registers without pre-dec
5184 memory addresses, since we store their values in r0
5186 if (TARGET_REGISTER_P (i)
5187 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5188 && mem_rtx != pre_dec))
5192 if (TARGET_REGISTER_P (i)
5193 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5194 && mem_rtx != pre_dec))
5196 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
5198 emit_move_insn (r0mode, reg_rtx);
5206 emit_move_insn (mem_rtx, reg_rtx);
5209 if (offset != d_rounding)
5213 push_regs (&live_regs_mask, current_function_interrupt);
5215 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5217 rtx insn = get_last_insn ();
5218 rtx last = emit_insn (gen_GOTaddr2picreg ());
5220 /* Mark these insns as possibly dead. Sometimes, flow2 may
5221 delete all uses of the PIC register. In this case, let it
5222 delete the initialization too. */
5225 insn = NEXT_INSN (insn);
5227 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5231 while (insn != last);
5234 if (SHMEDIA_REGS_STACK_ADJUST ())
5236 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5237 function_symbol (TARGET_FPU_ANY
5238 ? "__GCC_push_shmedia_regs"
5239 : "__GCC_push_shmedia_regs_nofpu"));
5240 /* This must NOT go through the PLT, otherwise mach and macl
5241 may be clobbered. */
5242 emit_insn (gen_shmedia_save_restore_regs_compact
5243 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5246 if (target_flags != save_flags && ! current_function_interrupt)
5248 rtx insn = emit_insn (gen_toggle_sz ());
5250 /* If we're lucky, a mode switch in the function body will
5251 overwrite fpscr, turning this insn dead. Tell flow this
5252 insn is ok to delete. */
5253 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5258 target_flags = save_flags;
5260 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5261 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
5263 if (frame_pointer_needed)
5264 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5266 if (TARGET_SHCOMPACT
5267 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5269 /* This must NOT go through the PLT, otherwise mach and macl
5270 may be clobbered. */
5271 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5272 function_symbol ("__GCC_shcompact_incoming_args"));
5273 emit_insn (gen_shcompact_incoming_args ());
5278 sh_expand_epilogue ()
5280 HARD_REG_SET live_regs_mask;
5284 int save_flags = target_flags;
5285 int frame_size, save_size;
5286 int fpscr_deferred = 0;
5288 d = calc_live_regs (&live_regs_mask);
5291 frame_size = rounded_frame_size (d);
5295 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5297 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5298 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5299 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5301 total_size = d + tregs_space;
5302 total_size += rounded_frame_size (total_size);
5303 save_size = total_size - frame_size;
5305 /* If adjusting the stack in a single step costs nothing extra, do so.
5306 I.e. either if a single addi is enough, or we need a movi anyway,
5307 and we don't exceed the maximum offset range (the test for the
5308 latter is conservative for simplicity). */
5310 && ! frame_pointer_needed
5311 && (CONST_OK_FOR_I10 (total_size)
5312 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5313 && total_size <= 2044)))
5314 d_rounding = frame_size;
5316 frame_size -= d_rounding;
5319 if (frame_pointer_needed)
5321 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
5323 /* We must avoid moving the stack pointer adjustment past code
5324 which reads from the local frame, else an interrupt could
5325 occur after the SP adjustment and clobber data in the local
5327 emit_insn (gen_blockage ());
5328 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5330 else if (frame_size)
5332 /* We must avoid moving the stack pointer adjustment past code
5333 which reads from the local frame, else an interrupt could
5334 occur after the SP adjustment and clobber data in the local
5336 emit_insn (gen_blockage ());
5337 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
5340 if (SHMEDIA_REGS_STACK_ADJUST ())
5342 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5343 function_symbol (TARGET_FPU_ANY
5344 ? "__GCC_pop_shmedia_regs"
5345 : "__GCC_pop_shmedia_regs_nofpu"));
5346 /* This must NOT go through the PLT, otherwise mach and macl
5347 may be clobbered. */
5348 emit_insn (gen_shmedia_save_restore_regs_compact
5349 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5352 /* Pop all the registers. */
5354 if (target_flags != save_flags && ! current_function_interrupt)
5355 emit_insn (gen_toggle_sz ());
5358 int offset = d_rounding;
5359 int offset_in_r0 = -1;
5362 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5363 int tmp_regno = R20_REG;
5365 /* We loop twice: first, we save 8-byte aligned registers in the
5366 higher addresses, that are known to be aligned. Then, we
5367 proceed to saving 32-bit registers that don't need 8-byte
5369 for (align = 0; align <= 1; align++)
5370 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5371 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5373 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5375 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5377 if (mode == SFmode && (i % 2) == 0
5378 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5379 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5385 /* If we're doing the aligned pass and this is not aligned,
5386 or we're doing the unaligned pass and this is aligned,
5388 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5392 reg_rtx = gen_rtx_REG (mode, reg);
5394 mem_rtx = gen_rtx_MEM (mode,
5395 gen_rtx_PLUS (Pmode,
5399 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5405 if (HAVE_POST_INCREMENT
5406 && (offset == offset_in_r0
5407 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5408 && mem_rtx == NULL_RTX)
5409 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5411 post_inc = gen_rtx_MEM (mode,
5412 gen_rtx_POST_INC (Pmode, r0));
5414 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5417 post_inc = NULL_RTX;
5426 if (mem_rtx != NULL_RTX)
5429 if (offset_in_r0 == -1)
5431 emit_move_insn (r0, GEN_INT (offset));
5432 offset_in_r0 = offset;
5434 else if (offset != offset_in_r0)
5439 GEN_INT (offset - offset_in_r0)));
5440 offset_in_r0 += offset - offset_in_r0;
5443 if (post_inc != NULL_RTX)
5449 (Pmode, r0, stack_pointer_rtx));
5455 offset_in_r0 += GET_MODE_SIZE (mode);
5458 mem_rtx = gen_rtx_MEM (mode, r0);
5460 mem_rtx = gen_rtx_MEM (mode,
5461 gen_rtx_PLUS (Pmode,
5465 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5466 && mem_rtx != post_inc)
5470 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5471 && mem_rtx != post_inc)
5473 insn = emit_move_insn (r0, mem_rtx);
5476 else if (TARGET_REGISTER_P (i))
5478 rtx tmp_reg = gen_rtx_REG (mode, tmp_regno);
5480 /* Give the scheduler a bit of freedom by using R20..R23
5481 in a round-robin fashion. Don't use R1 here because
5482 we want to use it for EH_RETURN_STACKADJ_RTX. */
5483 insn = emit_move_insn (tmp_reg, mem_rtx);
5485 if (++tmp_regno > R23_REG)
5486 tmp_regno = R20_REG;
5489 insn = emit_move_insn (reg_rtx, mem_rtx);
5491 offset += GET_MODE_SIZE (mode);
5494 if (offset != d + d_rounding)
5497 else /* ! TARGET_SH5 */
5500 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5502 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5504 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5506 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5507 && hard_regs_intersect_p (&live_regs_mask,
5508 ®_class_contents[DF_REGS]))
5510 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5512 if (j == FIRST_FP_REG && fpscr_deferred)
5517 if (target_flags != save_flags && ! current_function_interrupt)
5518 emit_insn (gen_toggle_sz ());
5519 target_flags = save_flags;
5521 output_stack_adjust (extra_push + current_function_pretend_args_size
5522 + save_size + d_rounding
5523 + current_function_args_info.stack_regs * 8,
5524 stack_pointer_rtx, 7, emit_insn);
5526 if (current_function_calls_eh_return)
5527 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5528 EH_RETURN_STACKADJ_RTX));
5530 /* Switch back to the normal stack if necessary. */
5532 emit_insn (gen_sp_switch_2 ());
5534 /* Tell flow the insn that pops PR isn't dead. */
5535 /* PR_REG will never be live in SHmedia mode, and we don't need to
5536 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5537 by the return pattern. */
5538 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5539 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5542 static int sh_need_epilogue_known = 0;
5547 if (! sh_need_epilogue_known)
5552 sh_expand_epilogue ();
5553 epilogue = get_insns ();
5555 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5557 return sh_need_epilogue_known > 0;
5560 /* Emit code to change the current function's return address to RA.
5561 TEMP is available as a scratch register, if needed. */
5564 sh_set_return_address (ra, tmp)
5567 HARD_REG_SET live_regs_mask;
5570 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5573 d = calc_live_regs (&live_regs_mask);
5575 /* If pr_reg isn't life, we can set it (or the register given in
5576 sh_media_register_for_return) directly. */
5577 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5583 int rr_regno = sh_media_register_for_return ();
5588 rr = gen_rtx_REG (DImode, rr_regno);
5591 rr = gen_rtx_REG (SImode, pr_reg);
5593 emit_insn (GEN_MOV (rr, ra));
5594 /* Tell flow the register for return isn't dead. */
5595 emit_insn (gen_rtx_USE (VOIDmode, rr));
5605 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5606 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5607 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5611 /* We loop twice: first, we save 8-byte aligned registers in the
5612 higher addresses, that are known to be aligned. Then, we
5613 proceed to saving 32-bit registers that don't need 8-byte
5615 for (align = 0; align <= 1; align++)
5616 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5617 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5619 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5621 if (mode == SFmode && (i % 2) == 0
5622 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5623 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5629 /* If we're doing the aligned pass and this is not aligned,
5630 or we're doing the unaligned pass and this is aligned,
5632 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5639 offset += GET_MODE_SIZE (mode);
5642 /* We can't find pr register. */
5646 pr_offset = (rounded_frame_size (d) - d_rounding + offset
5647 + SHMEDIA_REGS_STACK_ADJUST ());
5650 pr_offset = rounded_frame_size (d) - d_rounding;
5652 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5653 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5655 tmp = gen_rtx_MEM (Pmode, tmp);
5656 emit_insn (GEN_MOV (tmp, ra));
5659 /* Clear variables at function end. */
5662 sh_output_function_epilogue (file, size)
5663 FILE *file ATTRIBUTE_UNUSED;
5664 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5666 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5667 sh_need_epilogue_known = 0;
5668 sp_switch = NULL_RTX;
5672 sh_builtin_saveregs ()
5674 /* First unnamed integer register. */
5675 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5676 /* Number of integer registers we need to save. */
5677 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5678 /* First unnamed SFmode float reg */
5679 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5680 /* Number of SFmode float regs to save. */
5681 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5684 HOST_WIDE_INT alias_set;
5690 int pushregs = n_intregs;
5692 while (pushregs < NPARM_REGS (SImode) - 1
5693 && (CALL_COOKIE_INT_REG_GET
5694 (current_function_args_info.call_cookie,
5695 NPARM_REGS (SImode) - pushregs)
5698 current_function_args_info.call_cookie
5699 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5704 if (pushregs == NPARM_REGS (SImode))
5705 current_function_args_info.call_cookie
5706 |= (CALL_COOKIE_INT_REG (0, 1)
5707 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5709 current_function_args_info.call_cookie
5710 |= CALL_COOKIE_STACKSEQ (pushregs);
5712 current_function_pretend_args_size += 8 * n_intregs;
5714 if (TARGET_SHCOMPACT)
5718 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5720 error ("__builtin_saveregs not supported by this subtarget");
5727 /* Allocate block of memory for the regs. */
5728 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5729 Or can assign_stack_local accept a 0 SIZE argument? */
5730 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5733 regbuf = gen_rtx_MEM (BLKmode,
5734 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5735 else if (n_floatregs & 1)
5739 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5740 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5741 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5742 regbuf = change_address (regbuf, BLKmode, addr);
5745 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5746 alias_set = get_varargs_alias_set ();
5747 set_mem_alias_set (regbuf, alias_set);
5750 This is optimized to only save the regs that are necessary. Explicitly
5751 named args need not be saved. */
5753 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5754 adjust_address (regbuf, BLKmode,
5755 n_floatregs * UNITS_PER_WORD),
5759 /* Return the address of the regbuf. */
5760 return XEXP (regbuf, 0);
5763 This is optimized to only save the regs that are necessary. Explicitly
5764 named args need not be saved.
5765 We explicitly build a pointer to the buffer because it halves the insn
5766 count when not optimizing (otherwise the pointer is built for each reg
5768 We emit the moves in reverse order so that we can use predecrement. */
5770 fpregs = gen_reg_rtx (Pmode);
5771 emit_move_insn (fpregs, XEXP (regbuf, 0));
5772 emit_insn (gen_addsi3 (fpregs, fpregs,
5773 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5777 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5779 emit_insn (gen_addsi3 (fpregs, fpregs,
5780 GEN_INT (-2 * UNITS_PER_WORD)));
5781 mem = gen_rtx_MEM (DFmode, fpregs);
5782 set_mem_alias_set (mem, alias_set);
5783 emit_move_insn (mem,
5784 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5786 regno = first_floatreg;
5789 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5790 mem = gen_rtx_MEM (SFmode, fpregs);
5791 set_mem_alias_set (mem, alias_set);
5792 emit_move_insn (mem,
5793 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5794 - (TARGET_LITTLE_ENDIAN != 0)));
5798 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5802 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5803 mem = gen_rtx_MEM (SFmode, fpregs);
5804 set_mem_alias_set (mem, alias_set);
5805 emit_move_insn (mem,
5806 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5809 /* Return the address of the regbuf. */
5810 return XEXP (regbuf, 0);
5813 /* Define the `__builtin_va_list' type for the ABI. */
5818 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5821 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5822 return ptr_type_node;
5824 record = make_node (RECORD_TYPE);
5826 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5828 f_next_o_limit = build_decl (FIELD_DECL,
5829 get_identifier ("__va_next_o_limit"),
5831 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5833 f_next_fp_limit = build_decl (FIELD_DECL,
5834 get_identifier ("__va_next_fp_limit"),
5836 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5839 DECL_FIELD_CONTEXT (f_next_o) = record;
5840 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5841 DECL_FIELD_CONTEXT (f_next_fp) = record;
5842 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5843 DECL_FIELD_CONTEXT (f_next_stack) = record;
5845 TYPE_FIELDS (record) = f_next_o;
5846 TREE_CHAIN (f_next_o) = f_next_o_limit;
5847 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5848 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5849 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5851 layout_type (record);
5856 /* Implement `va_start' for varargs and stdarg. */
5859 sh_va_start (valist, nextarg)
5863 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5864 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5870 expand_builtin_saveregs ();
5871 std_expand_builtin_va_start (valist, nextarg);
5875 if ((! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5877 std_expand_builtin_va_start (valist, nextarg);
5881 f_next_o = TYPE_FIELDS (va_list_type_node);
5882 f_next_o_limit = TREE_CHAIN (f_next_o);
5883 f_next_fp = TREE_CHAIN (f_next_o_limit);
5884 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5885 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5887 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5888 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5889 valist, f_next_o_limit);
5890 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5891 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5892 valist, f_next_fp_limit);
5893 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5894 valist, f_next_stack);
5896 /* Call __builtin_saveregs. */
5897 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5898 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5899 TREE_SIDE_EFFECTS (t) = 1;
5900 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5902 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5907 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5908 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5909 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5910 TREE_SIDE_EFFECTS (t) = 1;
5911 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5913 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5914 TREE_SIDE_EFFECTS (t) = 1;
5915 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5917 nint = current_function_args_info.arg_count[SH_ARG_INT];
5922 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5923 build_int_2 (UNITS_PER_WORD * nint, 0)));
5924 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5925 TREE_SIDE_EFFECTS (t) = 1;
5926 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5928 u = make_tree (ptr_type_node, nextarg);
5929 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5930 TREE_SIDE_EFFECTS (t) = 1;
5931 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5934 /* Implement `va_arg'. */
5937 sh_va_arg (valist, type)
5940 HOST_WIDE_INT size, rsize;
5941 tree tmp, pptr_type_node;
5944 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5946 size = int_size_in_bytes (type);
5947 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5948 pptr_type_node = build_pointer_type (ptr_type_node);
5951 type = build_pointer_type (type);
5953 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) && ! TARGET_HITACHI)
5955 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5956 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5958 rtx lab_false, lab_over;
5960 f_next_o = TYPE_FIELDS (va_list_type_node);
5961 f_next_o_limit = TREE_CHAIN (f_next_o);
5962 f_next_fp = TREE_CHAIN (f_next_o_limit);
5963 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5964 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5966 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5967 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5968 valist, f_next_o_limit);
5969 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5971 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5972 valist, f_next_fp_limit);
5973 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5974 valist, f_next_stack);
5978 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5979 || (TREE_CODE (type) == COMPLEX_TYPE
5980 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5985 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5988 addr_rtx = gen_reg_rtx (Pmode);
5989 lab_false = gen_label_rtx ();
5990 lab_over = gen_label_rtx ();
5995 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5996 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5998 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
6000 expand_expr (next_fp_limit, NULL_RTX,
6001 Pmode, EXPAND_NORMAL),
6002 GE, const1_rtx, Pmode, 1, lab_false);
6004 if (TYPE_ALIGN (type) > BITS_PER_WORD
6005 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6006 && (n_floatregs & 1)))
6008 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
6009 build_int_2 (UNITS_PER_WORD, 0));
6010 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6011 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6012 TREE_SIDE_EFFECTS (tmp) = 1;
6013 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6016 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6017 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6019 emit_move_insn (addr_rtx, r);
6021 emit_jump_insn (gen_jump (lab_over));
6023 emit_label (lab_false);
6025 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6026 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6028 emit_move_insn (addr_rtx, r);
6032 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
6033 build_int_2 (rsize, 0));
6035 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
6037 expand_expr (next_o_limit, NULL_RTX,
6038 Pmode, EXPAND_NORMAL),
6039 GT, const1_rtx, Pmode, 1, lab_false);
6041 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6042 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6044 emit_move_insn (addr_rtx, r);
6046 emit_jump_insn (gen_jump (lab_over));
6048 emit_label (lab_false);
6050 if (size > 4 && ! TARGET_SH4)
6052 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6053 TREE_SIDE_EFFECTS (tmp) = 1;
6054 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6057 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6058 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6060 emit_move_insn (addr_rtx, r);
6063 emit_label (lab_over);
6065 tmp = make_tree (pptr_type_node, addr_rtx);
6066 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
6069 /* ??? In va-sh.h, there had been code to make values larger than
6070 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6072 result = std_expand_builtin_va_arg (valist, type);
6075 #ifdef POINTERS_EXTEND_UNSIGNED
6076 if (GET_MODE (addr) != Pmode)
6077 addr = convert_memory_address (Pmode, result);
6079 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
6080 set_mem_alias_set (result, get_varargs_alias_set ());
6082 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
6083 argument to the varargs alias set. */
6087 /* Define the offset between two registers, one to be eliminated, and
6088 the other its replacement, at the start of a routine. */
6091 initial_elimination_offset (from, to)
6096 int regs_saved_rounding = 0;
6097 int total_saved_regs_space;
6098 int total_auto_space;
6099 int save_flags = target_flags;
6101 HARD_REG_SET live_regs_mask;
6103 shmedia_space_reserved_for_target_registers = false;
6104 regs_saved = calc_live_regs (&live_regs_mask);
6105 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6107 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6109 shmedia_space_reserved_for_target_registers = true;
6110 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6113 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6114 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6115 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6117 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6118 copy_flags = target_flags;
6119 target_flags = save_flags;
6121 total_saved_regs_space = regs_saved + regs_saved_rounding;
6123 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6124 return total_saved_regs_space + total_auto_space
6125 + current_function_args_info.byref_regs * 8;
6127 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6128 return total_saved_regs_space + total_auto_space
6129 + current_function_args_info.byref_regs * 8;
6131 /* Initial gap between fp and sp is 0. */
6132 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6135 if (from == RETURN_ADDRESS_POINTER_REGNUM
6136 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6140 int i, n = total_saved_regs_space;
6142 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6144 n += total_auto_space;
6146 /* If it wasn't saved, there's not much we can do. */
6147 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6150 target_flags = copy_flags;
6152 /* We loop twice: first, check 8-byte aligned registers,
6153 that are stored in the higher addresses, that are known
6154 to be aligned. Then, check 32-bit registers that don't
6155 need 8-byte alignment. */
6156 for (align = 1; align >= 0; align--)
6157 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6158 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6160 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6162 if (mode == SFmode && (i % 2) == 1
6163 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6164 && TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1)))
6170 /* If we're doing the aligned pass and this is not aligned,
6171 or we're doing the unaligned pass and this is aligned,
6173 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
6177 n -= GET_MODE_SIZE (mode);
6181 target_flags = save_flags;
6189 return total_auto_space;
6195 /* Handle machine specific pragmas to be semi-compatible with Renesas
6199 sh_pr_interrupt (pfile)
6200 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6202 pragma_interrupt = 1;
6207 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6209 pragma_interrupt = pragma_trapa = 1;
6213 sh_pr_nosave_low_regs (pfile)
6214 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6216 pragma_nosave_low_regs = 1;
6219 /* Generate 'handle_interrupt' attribute for decls */
6222 sh_insert_attributes (node, attributes)
6226 if (! pragma_interrupt
6227 || TREE_CODE (node) != FUNCTION_DECL)
6230 /* We are only interested in fields. */
6231 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6234 /* Add a 'handle_interrupt' attribute. */
6235 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6240 /* Supported attributes:
6242 interrupt_handler -- specifies this function is an interrupt handler.
6244 sp_switch -- specifies an alternate stack for an interrupt handler
6247 trap_exit -- use a trapa to exit an interrupt function instead of
6248 an rte instruction. */
6250 const struct attribute_spec sh_attribute_table[] =
6252 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6253 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6254 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6255 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6256 { NULL, 0, 0, false, false, false, NULL }
6259 /* Handle an "interrupt_handler" attribute; arguments as in
6260 struct attribute_spec.handler. */
6262 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
6265 tree args ATTRIBUTE_UNUSED;
6266 int flags ATTRIBUTE_UNUSED;
6269 if (TREE_CODE (*node) != FUNCTION_DECL)
6271 warning ("`%s' attribute only applies to functions",
6272 IDENTIFIER_POINTER (name));
6273 *no_add_attrs = true;
6275 else if (TARGET_SHCOMPACT)
6277 error ("attribute interrupt_handler is not compatible with -m5-compact");
6278 *no_add_attrs = true;
6284 /* Handle an "sp_switch" attribute; arguments as in
6285 struct attribute_spec.handler. */
6287 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
6291 int flags ATTRIBUTE_UNUSED;
6294 if (TREE_CODE (*node) != FUNCTION_DECL)
6296 warning ("`%s' attribute only applies to functions",
6297 IDENTIFIER_POINTER (name));
6298 *no_add_attrs = true;
6300 else if (!pragma_interrupt)
6302 /* The sp_switch attribute only has meaning for interrupt functions. */
6303 warning ("`%s' attribute only applies to interrupt functions",
6304 IDENTIFIER_POINTER (name));
6305 *no_add_attrs = true;
6307 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6309 /* The argument must be a constant string. */
6310 warning ("`%s' attribute argument not a string constant",
6311 IDENTIFIER_POINTER (name));
6312 *no_add_attrs = true;
6316 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6317 TREE_STRING_POINTER (TREE_VALUE (args)));
6323 /* Handle an "trap_exit" attribute; arguments as in
6324 struct attribute_spec.handler. */
6326 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
6330 int flags ATTRIBUTE_UNUSED;
6333 if (TREE_CODE (*node) != FUNCTION_DECL)
6335 warning ("`%s' attribute only applies to functions",
6336 IDENTIFIER_POINTER (name));
6337 *no_add_attrs = true;
6339 else if (!pragma_interrupt)
6341 /* The trap_exit attribute only has meaning for interrupt functions. */
6342 warning ("`%s' attribute only applies to interrupt functions",
6343 IDENTIFIER_POINTER (name));
6344 *no_add_attrs = true;
6346 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6348 /* The argument must be a constant integer. */
6349 warning ("`%s' attribute argument not an integer constant",
6350 IDENTIFIER_POINTER (name));
6351 *no_add_attrs = true;
6355 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6362 sh_cfun_interrupt_handler_p ()
6364 return (lookup_attribute ("interrupt_handler",
6365 DECL_ATTRIBUTES (current_function_decl))
6369 /* Predicates used by the templates. */
6371 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
6372 Used only in general_movsrc_operand. */
6375 system_reg_operand (op, mode)
6377 enum machine_mode mode ATTRIBUTE_UNUSED;
6389 /* Returns 1 if OP can be source of a simple move operation.
6390 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
6391 invalid as are subregs of system registers. */
6394 general_movsrc_operand (op, mode)
6396 enum machine_mode mode;
6398 if (GET_CODE (op) == MEM)
6400 rtx inside = XEXP (op, 0);
6401 if (GET_CODE (inside) == CONST)
6402 inside = XEXP (inside, 0);
6404 if (GET_CODE (inside) == LABEL_REF)
6407 if (GET_CODE (inside) == PLUS
6408 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
6409 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
6412 /* Only post inc allowed. */
6413 if (GET_CODE (inside) == PRE_DEC)
6417 if ((mode == QImode || mode == HImode)
6418 && (GET_CODE (op) == SUBREG
6419 && GET_CODE (XEXP (op, 0)) == REG
6420 && system_reg_operand (XEXP (op, 0), mode)))
6423 return general_operand (op, mode);
6426 /* Returns 1 if OP can be a destination of a move.
6427 Same as general_operand, but no preinc allowed. */
6430 general_movdst_operand (op, mode)
6432 enum machine_mode mode;
6434 /* Only pre dec allowed. */
6435 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
6438 return general_operand (op, mode);
6441 /* Returns 1 if OP is a normal arithmetic register. */
6444 arith_reg_operand (op, mode)
6446 enum machine_mode mode;
6448 if (register_operand (op, mode))
6452 if (GET_CODE (op) == REG)
6454 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6455 regno = REGNO (SUBREG_REG (op));
6459 return (regno != T_REG && regno != PR_REG
6460 && ! TARGET_REGISTER_P (regno)
6461 && (regno != FPUL_REG || TARGET_SH4)
6462 && regno != MACH_REG && regno != MACL_REG);
6467 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
6468 because this would lead to missing sign extensions when truncating from
6469 DImode to SImode. */
6471 arith_reg_dest (op, mode)
6473 enum machine_mode mode;
6475 if (mode == DImode && GET_CODE (op) == SUBREG
6476 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
6478 return arith_reg_operand (op, mode);
6482 int_gpr_dest (op, mode)
6484 enum machine_mode mode ATTRIBUTE_UNUSED;
6486 enum machine_mode op_mode = GET_MODE (op);
6488 if (GET_MODE_CLASS (op_mode) != MODE_INT
6489 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
6491 if (! reload_completed)
6493 return true_regnum (op) <= LAST_GENERAL_REG;
6497 fp_arith_reg_operand (op, mode)
6499 enum machine_mode mode;
6501 if (register_operand (op, mode))
6505 if (GET_CODE (op) == REG)
6507 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6508 regno = REGNO (SUBREG_REG (op));
6512 return (regno >= FIRST_PSEUDO_REGISTER
6513 || FP_REGISTER_P (regno));
6518 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
6521 arith_operand (op, mode)
6523 enum machine_mode mode;
6525 if (arith_reg_operand (op, mode))
6530 /* FIXME: We should be checking whether the CONST_INT fits in a
6531 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
6532 attempting to transform a sequence of two 64-bit sets of the
6533 same register from literal constants into a set and an add,
6534 when the difference is too wide for an add. */
6535 if (GET_CODE (op) == CONST_INT
6536 || EXTRA_CONSTRAINT_C16 (op))
6541 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
6547 /* Returns 1 if OP is a valid source operand for a compare insn. */
6550 arith_reg_or_0_operand (op, mode)
6552 enum machine_mode mode;
6554 if (arith_reg_operand (op, mode))
6557 if (EXTRA_CONSTRAINT_Z (op))
6563 /* Return 1 if OP is a valid source operand for an SHmedia operation
6564 that takes either a register or a 6-bit immediate. */
6567 shmedia_6bit_operand (op, mode)
6569 enum machine_mode mode;
6571 return (arith_reg_operand (op, mode)
6572 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
6575 /* Returns 1 if OP is a valid source operand for a logical operation. */
6578 logical_operand (op, mode)
6580 enum machine_mode mode;
6582 if (arith_reg_operand (op, mode))
6587 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
6592 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
6599 and_operand (op, mode)
6601 enum machine_mode mode;
6603 if (logical_operand (op, mode))
6606 /* Check mshflo.l / mshflhi.l opportunities. */
6609 && GET_CODE (op) == CONST_INT
6610 && CONST_OK_FOR_J16 (INTVAL (op)))
6616 /* Nonzero if OP is a floating point value with value 0.0. */
6619 fp_zero_operand (op)
6624 if (GET_MODE (op) != SFmode)
6627 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6628 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6631 /* Nonzero if OP is a floating point value with value 1.0. */
6639 if (GET_MODE (op) != SFmode)
6642 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6643 return REAL_VALUES_EQUAL (r, dconst1);
6646 /* For -m4 and -m4-single-only, mode switching is used. If we are
6647 compiling without -mfmovd, movsf_ie isn't taken into account for
6648 mode switching. We could check in machine_dependent_reorg for
6649 cases where we know we are in single precision mode, but there is
6650 interface to find that out during reload, so we must avoid
6651 choosing an fldi alternative during reload and thus failing to
6652 allocate a scratch register for the constant loading. */
6656 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6660 tertiary_reload_operand (op, mode)
6662 enum machine_mode mode ATTRIBUTE_UNUSED;
6664 enum rtx_code code = GET_CODE (op);
6665 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6669 fpscr_operand (op, mode)
6671 enum machine_mode mode ATTRIBUTE_UNUSED;
6673 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6674 && GET_MODE (op) == PSImode);
6678 fpul_operand (op, mode)
6680 enum machine_mode mode;
6683 return fp_arith_reg_operand (op, mode);
6685 return (GET_CODE (op) == REG
6686 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6687 && GET_MODE (op) == mode);
6691 symbol_ref_operand (op, mode)
6693 enum machine_mode mode ATTRIBUTE_UNUSED;
6695 return (GET_CODE (op) == SYMBOL_REF);
6698 /* Return the TLS type for TLS symbols, 0 for otherwise. */
6700 tls_symbolic_operand (op, mode)
6702 enum machine_mode mode ATTRIBUTE_UNUSED;
6704 if (GET_CODE (op) != SYMBOL_REF)
6706 return SYMBOL_REF_TLS_MODEL (op);
6710 commutative_float_operator (op, mode)
6712 enum machine_mode mode;
6714 if (GET_MODE (op) != mode)
6716 switch (GET_CODE (op))
6728 noncommutative_float_operator (op, mode)
6730 enum machine_mode mode;
6732 if (GET_MODE (op) != mode)
6734 switch (GET_CODE (op))
6746 unary_float_operator (op, mode)
6748 enum machine_mode mode;
6750 if (GET_MODE (op) != mode)
6752 switch (GET_CODE (op))
6765 binary_float_operator (op, mode)
6767 enum machine_mode mode;
6769 if (GET_MODE (op) != mode)
6771 switch (GET_CODE (op))
6785 binary_logical_operator (op, mode)
6787 enum machine_mode mode;
6789 if (GET_MODE (op) != mode)
6791 switch (GET_CODE (op))
6804 equality_comparison_operator (op, mode)
6806 enum machine_mode mode;
6808 return ((mode == VOIDmode || GET_MODE (op) == mode)
6809 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6812 int greater_comparison_operator (op, mode)
6814 enum machine_mode mode;
6816 if (mode != VOIDmode && GET_MODE (op) == mode)
6818 switch (GET_CODE (op))
6830 int less_comparison_operator (op, mode)
6832 enum machine_mode mode;
6834 if (mode != VOIDmode && GET_MODE (op) == mode)
6836 switch (GET_CODE (op))
6848 /* Accept pseudos and branch target registers. */
6850 target_reg_operand (op, mode)
6852 enum machine_mode mode;
6855 || GET_MODE (op) != DImode)
6858 if (GET_CODE (op) == SUBREG)
6861 if (GET_CODE (op) != REG)
6864 /* We must protect ourselves from matching pseudos that are virtual
6865 register, because they will eventually be replaced with hardware
6866 registers that aren't branch-target registers. */
6867 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6868 || TARGET_REGISTER_P (REGNO (op)))
6874 /* Same as target_reg_operand, except that label_refs and symbol_refs
6875 are accepted before reload. */
6877 target_operand (op, mode)
6879 enum machine_mode mode;
6884 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6885 && EXTRA_CONSTRAINT_Csy (op))
6886 return ! reload_completed;
6888 return target_reg_operand (op, mode);
6892 mextr_bit_offset (op, mode)
6894 enum machine_mode mode ATTRIBUTE_UNUSED;
6898 if (GET_CODE (op) != CONST_INT)
6901 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6905 extend_reg_operand (op, mode)
6907 enum machine_mode mode;
6909 return (GET_CODE (op) == TRUNCATE
6911 : arith_reg_operand) (op, mode);
6915 trunc_hi_operand (op, mode)
6917 enum machine_mode mode;
6919 enum machine_mode op_mode = GET_MODE (op);
6921 if (op_mode != SImode && op_mode != DImode
6922 && op_mode != V4HImode && op_mode != V2SImode)
6924 return extend_reg_operand (op, mode);
6928 extend_reg_or_0_operand (op, mode)
6930 enum machine_mode mode;
6932 return (GET_CODE (op) == TRUNCATE
6934 : arith_reg_or_0_operand) (op, mode);
6938 general_extend_operand (op, mode)
6940 enum machine_mode mode;
6942 return (GET_CODE (op) == TRUNCATE
6944 : nonimmediate_operand) (op, mode);
6948 inqhi_operand (op, mode)
6950 enum machine_mode mode;
6952 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6955 /* Can't use true_regnum here because copy_cost wants to know about
6956 SECONDARY_INPUT_RELOAD_CLASS. */
6957 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6961 sh_rep_vec (v, mode)
6963 enum machine_mode mode;
6968 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6969 || (GET_MODE (v) != mode && mode != VOIDmode))
6971 i = XVECLEN (v, 0) - 2;
6972 x = XVECEXP (v, 0, i + 1);
6973 if (GET_MODE_UNIT_SIZE (mode) == 1)
6975 y = XVECEXP (v, 0, i);
6976 for (i -= 2 ; i >= 0; i -= 2)
6977 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6978 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6983 if (XVECEXP (v, 0, i) != x)
6988 /* Determine if V is a constant vector matching MODE with only one element
6989 that is not a sign extension. Two byte-sized elements count as one. */
6991 sh_1el_vec (v, mode)
6993 enum machine_mode mode;
6996 int i, last, least, sign_ix;
6999 if (GET_CODE (v) != CONST_VECTOR
7000 || (GET_MODE (v) != mode && mode != VOIDmode))
7002 /* Determine numbers of last and of least significant elements. */
7003 last = XVECLEN (v, 0) - 1;
7004 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7005 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7008 if (GET_MODE_UNIT_SIZE (mode) == 1)
7009 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7010 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7012 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7013 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7014 ? constm1_rtx : const0_rtx);
7015 i = XVECLEN (v, 0) - 1;
7017 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7024 sh_const_vec (v, mode)
7026 enum machine_mode mode;
7030 if (GET_CODE (v) != CONST_VECTOR
7031 || (GET_MODE (v) != mode && mode != VOIDmode))
7033 i = XVECLEN (v, 0) - 1;
7035 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7040 /* Return the destination address of a branch. */
7043 branch_dest (branch)
7046 rtx dest = SET_SRC (PATTERN (branch));
7049 if (GET_CODE (dest) == IF_THEN_ELSE)
7050 dest = XEXP (dest, 1);
7051 dest = XEXP (dest, 0);
7052 dest_uid = INSN_UID (dest);
7053 return INSN_ADDRESSES (dest_uid);
7056 /* Return nonzero if REG is not used after INSN.
7057 We assume REG is a reload reg, and therefore does
7058 not live past labels. It may live past calls or jumps though. */
7060 reg_unused_after (reg, insn)
7067 /* If the reg is set by this instruction, then it is safe for our
7068 case. Disregard the case where this is a store to memory, since
7069 we are checking a register used in the store address. */
7070 set = single_set (insn);
7071 if (set && GET_CODE (SET_DEST (set)) != MEM
7072 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7075 while ((insn = NEXT_INSN (insn)))
7077 code = GET_CODE (insn);
7080 /* If this is a label that existed before reload, then the register
7081 if dead here. However, if this is a label added by reorg, then
7082 the register may still be live here. We can't tell the difference,
7083 so we just ignore labels completely. */
7084 if (code == CODE_LABEL)
7089 if (code == JUMP_INSN)
7092 /* If this is a sequence, we must handle them all at once.
7093 We could have for instance a call that sets the target register,
7094 and an insn in a delay slot that uses the register. In this case,
7095 we must return 0. */
7096 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7101 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7103 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7104 rtx set = single_set (this_insn);
7106 if (GET_CODE (this_insn) == CALL_INSN)
7108 else if (GET_CODE (this_insn) == JUMP_INSN)
7110 if (INSN_ANNULLED_BRANCH_P (this_insn))
7115 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7117 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7119 if (GET_CODE (SET_DEST (set)) != MEM)
7125 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7130 else if (code == JUMP_INSN)
7133 else if (GET_RTX_CLASS (code) == 'i')
7135 rtx set = single_set (insn);
7137 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7139 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7140 return GET_CODE (SET_DEST (set)) != MEM;
7141 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7145 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7153 static GTY(()) rtx fpscr_rtx;
7159 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
7160 REG_USERVAR_P (fpscr_rtx) = 1;
7161 mark_user_reg (fpscr_rtx);
7163 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7164 mark_user_reg (fpscr_rtx);
7183 expand_sf_unop (fun, operands)
7184 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7187 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7191 expand_sf_binop (fun, operands)
7192 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7195 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7200 expand_df_unop (fun, operands)
7201 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7204 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7208 expand_df_binop (fun, operands)
7209 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7212 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7216 /* ??? gcc does flow analysis strictly after common subexpression
7217 elimination. As a result, common subexpression elimination fails
7218 when there are some intervening statements setting the same register.
7219 If we did nothing about this, this would hurt the precision switching
7220 for SH4 badly. There is some cse after reload, but it is unable to
7221 undo the extra register pressure from the unused instructions, and
7222 it cannot remove auto-increment loads.
7224 A C code example that shows this flow/cse weakness for (at least) SH
7225 and sparc (as of gcc ss-970706) is this:
7239 So we add another pass before common subexpression elimination, to
7240 remove assignments that are dead due to a following assignment in the
7241 same basic block. */
7244 mark_use (x, reg_set_block)
7245 rtx x, *reg_set_block;
7251 code = GET_CODE (x);
7256 int regno = REGNO (x);
7257 int nregs = (regno < FIRST_PSEUDO_REGISTER
7258 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7262 reg_set_block[regno + nregs - 1] = 0;
7269 rtx dest = SET_DEST (x);
7271 if (GET_CODE (dest) == SUBREG)
7272 dest = SUBREG_REG (dest);
7273 if (GET_CODE (dest) != REG)
7274 mark_use (dest, reg_set_block);
7275 mark_use (SET_SRC (x), reg_set_block);
7282 const char *fmt = GET_RTX_FORMAT (code);
7284 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7287 mark_use (XEXP (x, i), reg_set_block);
7288 else if (fmt[i] == 'E')
7289 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7290 mark_use (XVECEXP (x, i, j), reg_set_block);
7297 static rtx get_free_reg PARAMS ((HARD_REG_SET));
7299 /* This function returns a register to use to load the address to load
7300 the fpscr from. Currently it always returns r1 or r7, but when we are
7301 able to use pseudo registers after combine, or have a better mechanism
7302 for choosing a register, it should be done here. */
7303 /* REGS_LIVE is the liveness information for the point for which we
7304 need this allocation. In some bare-bones exit blocks, r1 is live at the
7305 start. We can even have all of r0..r3 being live:
7306 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7307 INSN before which new insns are placed with will clobber the register
7308 we return. If a basic block consists only of setting the return value
7309 register to a pseudo and using that register, the return value is not
7310 live before or after this block, yet we we'll insert our insns right in
7314 get_free_reg (regs_live)
7315 HARD_REG_SET regs_live;
7317 if (! TEST_HARD_REG_BIT (regs_live, 1))
7318 return gen_rtx_REG (Pmode, 1);
7320 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7321 there shouldn't be anything but a jump before the function end. */
7322 if (! TEST_HARD_REG_BIT (regs_live, 7))
7323 return gen_rtx_REG (Pmode, 7);
7328 /* This function will set the fpscr from memory.
7329 MODE is the mode we are setting it to. */
7331 fpscr_set_from_mem (mode, regs_live)
7333 HARD_REG_SET regs_live;
7335 enum attr_fp_mode fp_mode = mode;
7336 rtx addr_reg = get_free_reg (regs_live);
7338 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7339 emit_insn (gen_fpu_switch1 (addr_reg));
7341 emit_insn (gen_fpu_switch0 (addr_reg));
7344 /* Is the given character a logical line separator for the assembler? */
7345 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7346 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7350 sh_insn_length_adjustment (insn)
7353 /* Instructions with unfilled delay slots take up an extra two bytes for
7354 the nop in the delay slot. */
7355 if (((GET_CODE (insn) == INSN
7356 && GET_CODE (PATTERN (insn)) != USE
7357 && GET_CODE (PATTERN (insn)) != CLOBBER)
7358 || GET_CODE (insn) == CALL_INSN
7359 || (GET_CODE (insn) == JUMP_INSN
7360 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7361 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7362 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7363 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7366 /* SH2e has a bug that prevents the use of annulled branches, so if
7367 the delay slot is not filled, we'll have to put a NOP in it. */
7368 if (sh_cpu == CPU_SH2E
7369 && GET_CODE (insn) == JUMP_INSN
7370 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7371 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7372 && get_attr_type (insn) == TYPE_CBRANCH
7373 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7376 /* sh-dsp parallel processing insn take four bytes instead of two. */
7378 if (GET_CODE (insn) == INSN)
7381 rtx body = PATTERN (insn);
7382 const char *template;
7384 int maybe_label = 1;
7386 if (GET_CODE (body) == ASM_INPUT)
7387 template = XSTR (body, 0);
7388 else if (asm_noperands (body) >= 0)
7390 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7399 while (c == ' ' || c == '\t');
7400 /* all sh-dsp parallel-processing insns start with p.
7401 The only non-ppi sh insn starting with p is pref.
7402 The only ppi starting with pr is prnd. */
7403 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7405 /* The repeat pseudo-insn expands two three insns, a total of
7406 six bytes in size. */
7407 else if ((c == 'r' || c == 'R')
7408 && ! strncasecmp ("epeat", template, 5))
7410 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7412 /* If this is a label, it is obviously not a ppi insn. */
7413 if (c == ':' && maybe_label)
7418 else if (c == '\'' || c == '"')
7423 maybe_label = c != ':';
7431 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7432 isn't protected by a PIC unspec. */
7434 nonpic_symbol_mentioned_p (x)
7437 register const char *fmt;
7440 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7441 || GET_CODE (x) == PC)
7444 /* We don't want to look into the possible MEM location of a
7445 CONST_DOUBLE, since we're not going to use it, in general. */
7446 if (GET_CODE (x) == CONST_DOUBLE)
7449 if (GET_CODE (x) == UNSPEC
7450 && (XINT (x, 1) == UNSPEC_PIC
7451 || XINT (x, 1) == UNSPEC_GOT
7452 || XINT (x, 1) == UNSPEC_GOTOFF
7453 || XINT (x, 1) == UNSPEC_GOTPLT
7454 || XINT (x, 1) == UNSPEC_GOTTPOFF
7455 || XINT (x, 1) == UNSPEC_DTPOFF
7456 || XINT (x, 1) == UNSPEC_PLT))
7459 fmt = GET_RTX_FORMAT (GET_CODE (x));
7460 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7466 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7467 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
7470 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
7477 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
7478 @GOTOFF in `reg'. */
7480 legitimize_pic_address (orig, mode, reg)
7482 enum machine_mode mode ATTRIBUTE_UNUSED;
7485 if (tls_symbolic_operand (orig, Pmode))
7488 if (GET_CODE (orig) == LABEL_REF
7489 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
7492 reg = gen_reg_rtx (Pmode);
7494 emit_insn (gen_symGOTOFF2reg (reg, orig));
7497 else if (GET_CODE (orig) == SYMBOL_REF)
7500 reg = gen_reg_rtx (Pmode);
7502 emit_insn (gen_symGOT2reg (reg, orig));
7508 /* Mark the use of a constant in the literal table. If the constant
7509 has multiple labels, make it unique. */
7511 mark_constant_pool_use (x)
7514 rtx insn, lab, pattern;
7519 switch (GET_CODE (x))
7529 /* Get the first label in the list of labels for the same constant
7530 and delete another labels in the list. */
7532 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7534 if (GET_CODE (insn) != CODE_LABEL
7535 || LABEL_REFS (insn) != NEXT_INSN (insn))
7540 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
7541 INSN_DELETED_P (insn) = 1;
7543 /* Mark constants in a window. */
7544 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
7546 if (GET_CODE (insn) != INSN)
7549 pattern = PATTERN (insn);
7550 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
7553 switch (XINT (pattern, 1))
7555 case UNSPECV_CONST2:
7556 case UNSPECV_CONST4:
7557 case UNSPECV_CONST8:
7558 XVECEXP (pattern, 0, 1) = const1_rtx;
7560 case UNSPECV_WINDOW_END:
7561 if (XVECEXP (pattern, 0, 0) == x)
7564 case UNSPECV_CONST_END:
7574 /* Return true if it's possible to redirect BRANCH1 to the destination
7575 of an unconditional jump BRANCH2. We only want to do this if the
7576 resulting branch will have a short displacement. */
7578 sh_can_redirect_branch (branch1, branch2)
7582 if (flag_expensive_optimizations && simplejump_p (branch2))
7584 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
7588 for (distance = 0, insn = NEXT_INSN (branch1);
7589 insn && distance < 256;
7590 insn = PREV_INSN (insn))
7595 distance += get_attr_length (insn);
7597 for (distance = 0, insn = NEXT_INSN (branch1);
7598 insn && distance < 256;
7599 insn = NEXT_INSN (insn))
7604 distance += get_attr_length (insn);
7610 /* Return nonzero if register old_reg can be renamed to register new_reg. */
7612 sh_hard_regno_rename_ok (old_reg, new_reg)
7613 unsigned int old_reg ATTRIBUTE_UNUSED;
7614 unsigned int new_reg;
7617 /* Interrupt functions can only use registers that have already been
7618 saved by the prologue, even if they would normally be
7621 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
7627 /* Function to update the integer COST
7628 based on the relationship between INSN that is dependent on
7629 DEP_INSN through the dependence LINK. The default is to make no
7630 adjustment to COST. This can be used for example to specify to
7631 the scheduler that an output- or anti-dependence does not incur
7632 the same cost as a data-dependence. The return value should be
7633 the new value for COST. */
7635 sh_adjust_cost (insn, link, dep_insn, cost)
7637 rtx link ATTRIBUTE_UNUSED;
7645 /* On SHmedia, if the dependence is an anti-dependence or
7646 output-dependence, there is no cost. */
7647 if (REG_NOTE_KIND (link) != 0)
7650 if (get_attr_is_mac_media (insn)
7651 && get_attr_is_mac_media (dep_insn))
7654 else if (REG_NOTE_KIND (link) == 0)
7656 enum attr_type dep_type, type;
7658 if (recog_memoized (insn) < 0
7659 || recog_memoized (dep_insn) < 0)
7662 dep_type = get_attr_type (dep_insn);
7663 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7665 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7666 && (type = get_attr_type (insn)) != TYPE_CALL
7667 && type != TYPE_SFUNC)
7670 /* The only input for a call that is timing-critical is the
7671 function's address. */
7672 if (GET_CODE(insn) == CALL_INSN)
7674 rtx call = PATTERN (insn);
7676 if (GET_CODE (call) == PARALLEL)
7677 call = XVECEXP (call, 0 ,0);
7678 if (GET_CODE (call) == SET)
7679 call = SET_SRC (call);
7680 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7681 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7684 /* Likewise, the most timing critical input for an sfuncs call
7685 is the function address. However, sfuncs typically start
7686 using their arguments pretty quickly.
7687 Assume a four cycle delay before they are needed. */
7688 /* All sfunc calls are parallels with at least four components.
7689 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7690 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7691 && XVECLEN (PATTERN (insn), 0) >= 4
7692 && (reg = sfunc_uses_reg (insn)))
7694 if (! reg_set_p (reg, dep_insn))
7697 /* When the preceding instruction loads the shift amount of
7698 the following SHAD/SHLD, the latency of the load is increased
7701 && get_attr_type (insn) == TYPE_DYN_SHIFT
7702 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7703 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7704 XEXP (SET_SRC (single_set(insn)),
7707 /* When an LS group instruction with a latency of less than
7708 3 cycles is followed by a double-precision floating-point
7709 instruction, FIPR, or FTRV, the latency of the first
7710 instruction is increased to 3 cycles. */
7712 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7713 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7715 /* The lsw register of a double-precision computation is ready one
7717 else if (reload_completed
7718 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7719 && (use_pat = single_set (insn))
7720 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7724 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7725 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7728 /* An anti-dependence penalty of two applies if the first insn is a double
7729 precision fadd / fsub / fmul. */
7730 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7731 && recog_memoized (dep_insn) >= 0
7732 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7733 /* A lot of alleged anti-flow dependences are fake,
7734 so check this one is real. */
7735 && flow_dependent_p (dep_insn, insn))
7742 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7743 if DEP_INSN is anti-flow dependent on INSN. */
7745 flow_dependent_p (insn, dep_insn)
7748 rtx tmp = PATTERN (insn);
7750 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7751 return tmp == NULL_RTX;
7754 /* A helper function for flow_dependent_p called through note_stores. */
7756 flow_dependent_p_1 (x, pat, data)
7758 rtx pat ATTRIBUTE_UNUSED;
7761 rtx * pinsn = (rtx *) data;
7763 if (*pinsn && reg_referenced_p (x, *pinsn))
7767 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7768 'special function' patterns (type sfunc) that clobber pr, but that
7769 do not look like function calls to leaf_function_p. Hence we must
7770 do this extra check. */
7774 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7777 /* This Function returns nonzero if the DFA based scheduler interface
7778 is to be used. At present this is supported for the SH4 only. */
7780 sh_use_dfa_interface()
7782 if (TARGET_HARD_SH4)
7788 /* This function returns "2" to indicate dual issue for the SH4
7789 processor. To be used by the DFA pipeline description. */
7793 if (TARGET_SUPERSCALAR)
7799 /* SHmedia requires registers for branches, so we can't generate new
7800 branches past reload. */
7802 sh_cannot_modify_jumps_p ()
7804 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7808 sh_target_reg_class (void)
7810 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
7814 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
7816 return (shmedia_space_reserved_for_target_registers
7817 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
7821 sh_ms_bitfield_layout_p (record_type)
7822 tree record_type ATTRIBUTE_UNUSED;
7828 On the SH1..SH4, the trampoline looks like
7829 2 0002 D202 mov.l l2,r2
7830 1 0000 D301 mov.l l1,r3
7833 5 0008 00000000 l1: .long area
7834 6 000c 00000000 l2: .long function
7836 SH5 (compact) uses r1 instead of r3 for the static chain. */
7839 /* Emit RTL insns to initialize the variable parts of a trampoline.
7840 FNADDR is an RTX for the address of the function's pure code.
7841 CXT is an RTX for the static chain value for the function. */
7844 sh_initialize_trampoline (tramp, fnaddr, cxt)
7845 rtx tramp, fnaddr, cxt;
7847 if (TARGET_SHMEDIA64)
7852 rtx movi1 = GEN_INT (0xcc000010);
7853 rtx shori1 = GEN_INT (0xc8000010);
7856 /* The following trampoline works within a +- 128 KB range for cxt:
7857 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7858 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7859 gettr tr1,r1; blink tr0,r63 */
7860 /* Address rounding makes it hard to compute the exact bounds of the
7861 offset for this trampoline, but we have a rather generous offset
7862 range, so frame_offset should do fine as an upper bound. */
7863 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7865 /* ??? could optimize this trampoline initialization
7866 by writing DImode words with two insns each. */
7867 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7868 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7869 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7870 insn = gen_rtx_AND (DImode, insn, mask);
7871 /* Or in ptb/u .,tr1 pattern */
7872 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7873 insn = force_operand (insn, NULL_RTX);
7874 insn = gen_lowpart (SImode, insn);
7875 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7876 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7877 insn = gen_rtx_AND (DImode, insn, mask);
7878 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7879 insn = gen_lowpart (SImode, insn);
7880 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7881 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7882 insn = gen_rtx_AND (DImode, insn, mask);
7883 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7884 insn = gen_lowpart (SImode, insn);
7885 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7886 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7887 insn = gen_rtx_AND (DImode, insn, mask);
7888 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7889 insn = gen_lowpart (SImode, insn);
7890 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7892 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7893 insn = gen_rtx_AND (DImode, insn, mask);
7894 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7895 insn = gen_lowpart (SImode, insn);
7896 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7898 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7899 GEN_INT (0x6bf10600));
7900 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7901 GEN_INT (0x4415fc10));
7902 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7903 GEN_INT (0x4401fff0));
7904 emit_insn (gen_ic_invalidate_line (tramp));
7907 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7908 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7910 tramp_templ = gen_datalabel_ref (tramp_templ);
7911 dst = gen_rtx_MEM (BLKmode, tramp);
7912 src = gen_rtx_MEM (BLKmode, tramp_templ);
7913 set_mem_align (dst, 256);
7914 set_mem_align (src, 64);
7915 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7917 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7919 emit_move_insn (gen_rtx_MEM (Pmode,
7920 plus_constant (tramp,
7922 + GET_MODE_SIZE (Pmode))),
7924 emit_insn (gen_ic_invalidate_line (tramp));
7927 else if (TARGET_SHMEDIA)
7929 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7930 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7931 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7932 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7933 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7934 rotated 10 right, and higher 16 bit of every 32 selected. */
7936 = force_reg (V2HImode, (simplify_gen_subreg
7937 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7938 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7939 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7941 tramp = force_reg (Pmode, tramp);
7942 fnaddr = force_reg (SImode, fnaddr);
7943 cxt = force_reg (SImode, cxt);
7944 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7945 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7947 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
7948 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7949 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7950 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7951 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7952 gen_rtx_SUBREG (V2HImode, cxt, 0),
7954 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
7955 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7956 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7957 if (TARGET_LITTLE_ENDIAN)
7959 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7960 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7964 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7965 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7967 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7968 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7969 emit_insn (gen_ic_invalidate_line (tramp));
7972 else if (TARGET_SHCOMPACT)
7974 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7977 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7978 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7980 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7981 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7983 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7985 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7989 if (TARGET_USERMODE)
7990 emit_library_call (function_symbol ("__ic_invalidate"),
7991 0, VOIDmode, 1, tramp, SImode);
7993 emit_insn (gen_ic_invalidate_line (tramp));
7997 /* FIXME: This is overly conservative. A SHcompact function that
7998 receives arguments ``by reference'' will have them stored in its
7999 own stack frame, so it must not pass pointers or references to
8000 these arguments to other functions by means of sibling calls. */
8002 sh_function_ok_for_sibcall (decl, exp)
8004 tree exp ATTRIBUTE_UNUSED;
8007 && (! TARGET_SHCOMPACT
8008 || current_function_args_info.stack_regs == 0)
8009 && ! sh_cfun_interrupt_handler_p ());
8012 /* Machine specific built-in functions. */
8014 struct builtin_description
8016 const enum insn_code icode;
8017 const char *const name;
8021 /* describe number and signedness of arguments; arg[0] == result
8022 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
8023 static const char signature_args[][4] =
8025 #define SH_BLTIN_V2SI2 0
8027 #define SH_BLTIN_V4HI2 1
8029 #define SH_BLTIN_V2SI3 2
8031 #define SH_BLTIN_V4HI3 3
8033 #define SH_BLTIN_V8QI3 4
8035 #define SH_BLTIN_MAC_HISI 5
8037 #define SH_BLTIN_SH_HI 6
8039 #define SH_BLTIN_SH_SI 7
8041 #define SH_BLTIN_V4HI2V2SI 8
8043 #define SH_BLTIN_V4HI2V8QI 9
8045 #define SH_BLTIN_SISF 10
8047 #define SH_BLTIN_LDUA_L 11
8049 #define SH_BLTIN_LDUA_Q 12
8051 #define SH_BLTIN_STUA_L 13
8053 #define SH_BLTIN_STUA_Q 14
8055 #define SH_BLTIN_UDI 15
8057 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
8058 #define SH_BLTIN_2 16
8059 #define SH_BLTIN_SU 16
8061 #define SH_BLTIN_3 17
8062 #define SH_BLTIN_SUS 17
8064 #define SH_BLTIN_PSSV 18
8066 #define SH_BLTIN_XXUU 19
8067 #define SH_BLTIN_UUUU 19
8069 #define SH_BLTIN_PV 20
8072 /* mcmv: operands considered unsigned. */
8073 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
8074 /* mperm: control value considered unsigned int. */
8075 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
8076 /* mshards_q: returns signed short. */
8077 /* nsb: takes long long arg, returns unsigned char. */
8078 static const struct builtin_description bdesc[] =
8080 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
8081 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
8082 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
8083 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
8084 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
8085 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
8086 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
8088 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8089 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8091 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
8092 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
8093 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
8094 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
8095 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
8096 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
8097 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
8098 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
8099 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
8100 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
8101 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
8102 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
8103 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
8104 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
8105 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
8106 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
8107 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
8108 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
8109 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
8110 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
8111 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
8112 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
8113 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
8114 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
8115 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
8116 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
8117 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
8118 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
8119 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
8120 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
8121 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
8122 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
8123 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
8124 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
8125 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
8126 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
8127 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
8128 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
8129 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
8130 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
8131 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
8132 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
8133 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
8134 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
8135 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
8136 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
8137 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
8138 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
8139 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
8140 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
8141 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
8142 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
8143 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
8144 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
8146 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8147 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8148 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8149 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8150 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8151 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8152 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8153 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8154 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8155 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8156 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8157 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8158 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8159 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8160 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8161 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8163 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
8164 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
8166 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8167 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8172 sh_media_init_builtins ()
8174 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8175 const struct builtin_description *d;
8177 memset (shared, 0, sizeof shared);
8178 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
8180 tree type, arg_type;
8181 int signature = d->signature;
8184 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8185 type = shared[signature];
8188 int has_result = signature_args[signature][0] != 0;
8190 if (signature_args[signature][1] == 8
8191 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8193 if (! TARGET_FPU_ANY
8194 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8196 type = void_list_node;
8199 int arg = signature_args[signature][i];
8200 int opno = i - 1 + has_result;
8203 arg_type = ptr_type_node;
8205 arg_type = ((*lang_hooks.types.type_for_mode)
8206 (insn_data[d->icode].operand[opno].mode,
8211 arg_type = void_type_node;
8214 type = tree_cons (NULL_TREE, arg_type, type);
8216 type = build_function_type (arg_type, type);
8217 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8218 shared[signature] = type;
8220 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8229 sh_media_init_builtins ();
8232 /* Expand an expression EXP that calls a built-in function,
8233 with result going to TARGET if that's convenient
8234 (and in mode MODE if that's convenient).
8235 SUBTARGET may be used as the target for computing one of EXP's operands.
8236 IGNORE is nonzero if the value is to be ignored. */
8239 sh_expand_builtin (exp, target, subtarget, mode, ignore)
8242 rtx subtarget ATTRIBUTE_UNUSED;
8243 enum machine_mode mode ATTRIBUTE_UNUSED;
8246 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8247 tree arglist = TREE_OPERAND (exp, 1);
8248 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8249 const struct builtin_description *d = &bdesc[fcode];
8250 enum insn_code icode = d->icode;
8251 int signature = d->signature;
8252 enum machine_mode tmode = VOIDmode;
8257 if (signature_args[signature][0])
8262 tmode = insn_data[icode].operand[0].mode;
8264 || GET_MODE (target) != tmode
8265 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8266 target = gen_reg_rtx (tmode);
8272 for (i = 1; i <= 3; i++, nop++)
8275 enum machine_mode opmode, argmode;
8277 if (! signature_args[signature][i])
8279 arg = TREE_VALUE (arglist);
8280 if (arg == error_mark_node)
8282 arglist = TREE_CHAIN (arglist);
8283 opmode = insn_data[icode].operand[nop].mode;
8284 argmode = TYPE_MODE (TREE_TYPE (arg));
8285 if (argmode != opmode)
8286 arg = build1 (NOP_EXPR,
8287 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8288 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8289 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8290 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8296 pat = (*insn_data[d->icode].genfun) (op[0]);
8299 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8302 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8305 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8317 sh_expand_unop_v2sf (code, op0, op1)
8321 rtx sel0 = const0_rtx;
8322 rtx sel1 = const1_rtx;
8323 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
8324 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8326 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8327 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8331 sh_expand_binop_v2sf (code, op0, op1, op2)
8335 rtx sel0 = const0_rtx;
8336 rtx sel1 = const1_rtx;
8337 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx))
8339 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8341 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8342 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8345 /* Return the class of registers for which a mode change from FROM to TO
8348 sh_cannot_change_mode_class (from, to, class)
8349 enum machine_mode from, to;
8350 enum reg_class class;
8352 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8354 if (TARGET_LITTLE_ENDIAN)
8356 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8357 return reg_classes_intersect_p (DF_REGS, class);
8361 if (GET_MODE_SIZE (from) < 8)
8362 return reg_classes_intersect_p (DF_HI_REGS, class);
8369 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8370 that label is used. */
8373 sh_mark_label (address, nuses)
8377 if (GOTOFF_P (address))
8379 /* Extract the label or symbol. */
8380 address = XEXP (address, 0);
8381 if (GET_CODE (address) == PLUS)
8382 address = XEXP (address, 0);
8383 address = XVECEXP (address, 0, 0);
8385 if (GET_CODE (address) == LABEL_REF
8386 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8387 LABEL_NUSES (XEXP (address, 0)) += nuses;
8390 /* Compute extra cost of moving data between one register class
8393 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8394 uses this information. Hence, the general register <-> floating point
8395 register information here is not used for SFmode. */
8398 sh_register_move_cost (mode, srcclass, dstclass)
8399 enum machine_mode mode;
8400 enum reg_class srcclass, dstclass;
8402 if (dstclass == T_REGS || dstclass == PR_REGS)
8405 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8406 && REGCLASS_HAS_FP_REG (srcclass)
8407 && REGCLASS_HAS_FP_REG (dstclass))
8410 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
8411 || (dstclass== MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
8414 if ((REGCLASS_HAS_FP_REG (dstclass)
8415 && REGCLASS_HAS_GENERAL_REG (srcclass))
8416 || (REGCLASS_HAS_GENERAL_REG (dstclass)
8417 && REGCLASS_HAS_FP_REG (srcclass)))
8418 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
8419 * ((GET_MODE_SIZE (mode) + 7) / 8U));
8421 if ((dstclass == FPUL_REGS
8422 && REGCLASS_HAS_GENERAL_REG (srcclass))
8423 || (srcclass == FPUL_REGS
8424 && REGCLASS_HAS_GENERAL_REG (dstclass)))
8427 if ((dstclass == FPUL_REGS
8428 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
8429 || (srcclass == FPUL_REGS
8430 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
8433 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8434 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8437 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8438 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8443 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
8444 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
8445 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
8447 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
8450 /* Like register_operand, but take into account that SHMEDIA can use
8451 the constant zero like a general register. */
8453 sh_register_operand (op, mode)
8455 enum machine_mode mode;
8457 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
8459 return register_operand (op, mode);
8462 static rtx emit_load_ptr PARAMS ((rtx, rtx));
8465 emit_load_ptr (reg, addr)
8468 rtx mem = gen_rtx_MEM (ptr_mode, addr);
8470 if (Pmode != ptr_mode)
8471 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
8472 return emit_move_insn (reg, mem);
8476 sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
8478 tree thunk_fndecl ATTRIBUTE_UNUSED;
8479 HOST_WIDE_INT delta;
8480 HOST_WIDE_INT vcall_offset;
8483 CUMULATIVE_ARGS cum;
8484 int structure_value_byref = 0;
8485 rtx this, this_value, sibcall, insns, funexp;
8486 tree funtype = TREE_TYPE (function);
8487 int simple_add = CONST_OK_FOR_ADD (delta);
8489 rtx scratch0, scratch1, scratch2;
8491 reload_completed = 1;
8492 epilogue_completed = 1;
8494 current_function_uses_only_leaf_regs = 1;
8496 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8498 /* Find the "this" pointer. We have such a wide range of ABIs for the
8499 SH that it's best to do this completely machine independently.
8500 "this" is passed as first argument, unless a structure return pointer
8501 comes first, in which case "this" comes second. */
8502 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0);
8503 #ifndef PCC_STATIC_STRUCT_RETURN
8504 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
8505 structure_value_byref = 1;
8506 #endif /* not PCC_STATIC_STRUCT_RETURN */
8507 if (structure_value_byref && struct_value_rtx == 0)
8509 tree ptype = build_pointer_type (TREE_TYPE (funtype));
8511 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
8513 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
8515 /* For SHcompact, we only have r0 for a scratch register: r1 is the
8516 static chain pointer (even if you can't have nested virtual functions
8517 right now, someone might implement them sometime), and the rest of the
8518 registers are used for argument passing, are callee-saved, or reserved. */
8519 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
8522 scratch1 = gen_rtx_REG (ptr_mode, 1);
8523 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
8524 pointing where to return struct values. */
8525 scratch2 = gen_rtx_REG (Pmode, 3);
8527 else if (TARGET_SHMEDIA)
8529 scratch1 = gen_rtx_REG (ptr_mode, 21);
8530 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
8533 this_value = plus_constant (this, delta);
8535 && (simple_add || scratch0 != scratch1)
8536 && strict_memory_address_p (ptr_mode, this_value))
8538 emit_load_ptr (scratch0, this_value);
8544 else if (simple_add)
8545 emit_move_insn (this, this_value);
8548 emit_move_insn (scratch1, GEN_INT (delta));
8549 emit_insn (gen_add2_insn (this, scratch1));
8557 emit_load_ptr (scratch0, this);
8559 offset_addr = plus_constant (scratch0, vcall_offset);
8560 if (strict_memory_address_p (ptr_mode, offset_addr))
8562 else if (! TARGET_SH5)
8564 /* scratch0 != scratch1, and we have indexed loads. Get better
8565 schedule by loading the offset into r1 and using an indexed
8566 load - then the load of r1 can issue before the load from
8567 (this + delta) finishes. */
8568 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8569 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
8571 else if (CONST_OK_FOR_ADD (vcall_offset))
8573 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
8574 offset_addr = scratch0;
8576 else if (scratch0 != scratch1)
8578 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8579 emit_insn (gen_add2_insn (scratch0, scratch1));
8580 offset_addr = scratch0;
8583 abort (); /* FIXME */
8584 emit_load_ptr (scratch0, offset_addr);
8586 if (Pmode != ptr_mode)
8587 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
8588 emit_insn (gen_add2_insn (this, scratch0));
8591 /* Generate a tail call to the target function. */
8592 if (! TREE_USED (function))
8594 assemble_external (function);
8595 TREE_USED (function) = 1;
8597 funexp = XEXP (DECL_RTL (function), 0);
8598 emit_move_insn (scratch2, funexp);
8599 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
8600 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
8601 SIBLING_CALL_P (sibcall) = 1;
8602 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
8605 /* Run just enough of rest_of_compilation to do scheduling and get
8606 the insns emitted. Note that use_thunk calls
8607 assemble_start_function and assemble_end_function. */
8609 insn_locators_initialize ();
8610 insns = get_insns ();
8612 if (optimize > 0 && flag_schedule_insns_after_reload)
8615 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
8616 life_analysis (insns, rtl_dump_file, PROP_FINAL);
8618 split_all_insns (1);
8620 schedule_insns (rtl_dump_file);
8625 if (optimize > 0 && flag_delayed_branch)
8626 dbr_schedule (insns, rtl_dump_file);
8627 shorten_branches (insns);
8628 final_start_function (insns, file, 1);
8629 final (insns, file, 1, 0);
8630 final_end_function ();
8632 if (optimize > 0 && flag_schedule_insns_after_reload)
8634 /* Release all memory allocated by flow. */
8635 free_basic_block_vars (0);
8637 /* Release all memory held by regsets now. */
8638 regset_release_memory ();
8641 reload_completed = 0;
8642 epilogue_completed = 0;
8647 function_symbol (const char *name)
8649 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
8650 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;