1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
54 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
55 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
57 /* These are some macros to abstract register modes. */
58 #define CONST_OK_FOR_ADD(size) \
59 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
60 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
61 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
62 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
64 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
65 int current_function_interrupt;
67 /* ??? The pragma interrupt support will not work for SH3. */
68 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
69 output code for the next function appropriate for an interrupt handler. */
72 /* This is set by the trap_exit attribute for functions. It specifies
73 a trap number to be used in a trapa instruction at function exit
74 (instead of an rte instruction). */
77 /* This is used by the sp_switch attribute for functions. It specifies
78 a variable holding the address of the stack the interrupt function
79 should switch to/from at entry/exit. */
82 /* This is set by #pragma trapa, and is similar to the above, except that
83 the compiler doesn't emit code to preserve all registers. */
84 static int pragma_trapa;
86 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
87 which has a separate set of low regs for User and Supervisor modes.
88 This should only be used for the lowest level of interrupts. Higher levels
89 of interrupts must save the registers in case they themselves are
91 int pragma_nosave_low_regs;
93 /* This is used for communication between SETUP_INCOMING_VARARGS and
94 sh_expand_prologue. */
95 int current_function_anonymous_args;
97 /* Global variables for machine-dependent things. */
99 /* Which cpu are we scheduling for. */
100 enum processor_type sh_cpu;
102 /* Saved operands from the last compare to use when we generate an scc
108 /* Provides the class number of the smallest class containing
111 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
113 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
150 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
154 char sh_register_names[FIRST_PSEUDO_REGISTER] \
155 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
157 char sh_additional_register_names[ADDREGNAMES_SIZE] \
158 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
159 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
161 /* Provide reg_class from a letter such as appears in the machine
162 description. *: target independently reserved letter.
163 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
165 enum reg_class reg_class_from_letter[] =
167 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
168 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
169 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
170 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
171 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
172 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
173 /* y */ FPUL_REGS, /* z */ R0_REGS
176 int assembler_dialect;
178 static void split_branches PARAMS ((rtx));
179 static int branch_dest PARAMS ((rtx));
180 static void force_into PARAMS ((rtx, rtx));
181 static void print_slot PARAMS ((rtx));
182 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
183 static void dump_table PARAMS ((rtx));
184 static int hi_const PARAMS ((rtx));
185 static int broken_move PARAMS ((rtx));
186 static int mova_p PARAMS ((rtx));
187 static rtx find_barrier PARAMS ((int, rtx, rtx));
188 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
189 static rtx gen_block_redirect PARAMS ((rtx, int, int));
190 static void sh_reorg PARAMS ((void));
191 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
192 static rtx frame_insn PARAMS ((rtx));
193 static rtx push PARAMS ((int));
194 static void pop PARAMS ((int));
195 static void push_regs PARAMS ((HARD_REG_SET *, int));
196 static int calc_live_regs PARAMS ((HARD_REG_SET *));
197 static void mark_use PARAMS ((rtx, rtx *));
198 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
199 static rtx mark_constant_pool_use PARAMS ((rtx));
200 const struct attribute_spec sh_attribute_table[];
201 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
202 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
203 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
204 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
205 static void sh_insert_attributes PARAMS ((tree, tree *));
206 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
207 static int sh_use_dfa_interface PARAMS ((void));
208 static int sh_issue_rate PARAMS ((void));
209 static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
211 static bool sh_cannot_modify_jumps_p PARAMS ((void));
212 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
214 static void sh_init_builtins PARAMS ((void));
215 static void sh_media_init_builtins PARAMS ((void));
216 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
217 static void sh_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
218 HOST_WIDE_INT, tree));
219 static int flow_dependent_p PARAMS ((rtx, rtx));
220 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
221 static int shiftcosts PARAMS ((rtx));
222 static int andcosts PARAMS ((rtx));
223 static int addsubcosts PARAMS ((rtx));
224 static int multcosts PARAMS ((rtx));
225 static bool unspec_caller_rtx_p PARAMS ((rtx));
226 static bool sh_cannot_copy_insn_p PARAMS ((rtx));
227 static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
228 static int sh_address_cost PARAMS ((rtx));
230 /* Initialize the GCC target structure. */
231 #undef TARGET_ATTRIBUTE_TABLE
232 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
234 /* The next two are used for debug info when compiling with -gdwarf. */
235 #undef TARGET_ASM_UNALIGNED_HI_OP
236 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
237 #undef TARGET_ASM_UNALIGNED_SI_OP
238 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
240 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
241 #undef TARGET_ASM_UNALIGNED_DI_OP
242 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
246 #undef TARGET_ASM_FUNCTION_EPILOGUE
247 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
249 #undef TARGET_ASM_OUTPUT_MI_THUNK
250 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
252 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
253 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
255 #undef TARGET_INSERT_ATTRIBUTES
256 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
258 #undef TARGET_SCHED_ADJUST_COST
259 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
261 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
262 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
264 #undef TARGET_SCHED_ISSUE_RATE
265 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
267 #undef TARGET_CANNOT_MODIFY_JUMPS_P
268 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
270 #undef TARGET_MS_BITFIELD_LAYOUT_P
271 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
273 #undef TARGET_INIT_BUILTINS
274 #define TARGET_INIT_BUILTINS sh_init_builtins
275 #undef TARGET_EXPAND_BUILTIN
276 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
278 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
279 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
281 #undef TARGET_CANNOT_COPY_INSN_P
282 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
283 #undef TARGET_RTX_COSTS
284 #define TARGET_RTX_COSTS sh_rtx_costs
285 #undef TARGET_ADDRESS_COST
286 #define TARGET_ADDRESS_COST sh_address_cost
288 #undef TARGET_MACHINE_DEPENDENT_REORG
289 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
292 #undef TARGET_HAVE_TLS
293 #define TARGET_HAVE_TLS true
296 struct gcc_target targetm = TARGET_INITIALIZER;
298 /* Print the operand address in x to the stream. */
301 print_operand_address (stream, x)
305 switch (GET_CODE (x))
309 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
314 rtx base = XEXP (x, 0);
315 rtx index = XEXP (x, 1);
317 switch (GET_CODE (index))
320 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
321 reg_names[true_regnum (base)]);
327 int base_num = true_regnum (base);
328 int index_num = true_regnum (index);
330 fprintf (stream, "@(r0,%s)",
331 reg_names[MAX (base_num, index_num)]);
343 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
347 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
351 x = mark_constant_pool_use (x);
352 output_addr_const (stream, x);
357 /* Print operand x (an rtx) in assembler syntax to file stream
358 according to modifier code.
360 '.' print a .s if insn needs delay slot
361 ',' print LOCAL_LABEL_PREFIX
362 '@' print trap, rte or rts depending upon pragma interruptness
363 '#' output a nop if there is nothing to put in the delay slot
364 ''' print likelyhood suffix (/u for unlikely).
365 'O' print a constant without the #
366 'R' print the LSW of a dp value - changes if in little endian
367 'S' print the MSW of a dp value - changes if in little endian
368 'T' print the next word of a dp value - same as 'R' in big endian mode.
369 'M' print an `x' if `m' will print `base,index'.
370 'N' print 'r63' if the operand is (const_int 0).
371 'm' print a pair `base,offset' or `base,index', for LD and ST.
372 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
373 'o' output an operator. */
376 print_operand (stream, x, code)
385 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
386 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
387 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
390 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
394 fprintf (stream, "trapa #%d", trap_exit);
395 else if (sh_cfun_interrupt_handler_p ())
396 fprintf (stream, "rte");
398 fprintf (stream, "rts");
401 /* Output a nop if there's nothing in the delay slot. */
402 if (dbr_sequence_length () == 0)
403 fprintf (stream, "\n\tnop");
407 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
409 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
410 fputs ("/u", stream);
414 x = mark_constant_pool_use (x);
415 output_addr_const (stream, x);
418 fputs (reg_names[REGNO (x) + LSW], (stream));
421 fputs (reg_names[REGNO (x) + MSW], (stream));
424 /* Next word of a double. */
425 switch (GET_CODE (x))
428 fputs (reg_names[REGNO (x) + 1], (stream));
431 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
432 && GET_CODE (XEXP (x, 0)) != POST_INC)
433 x = adjust_address (x, SImode, 4);
434 print_operand_address (stream, XEXP (x, 0));
441 switch (GET_CODE (x))
443 case PLUS: fputs ("add", stream); break;
444 case MINUS: fputs ("sub", stream); break;
445 case MULT: fputs ("mul", stream); break;
446 case DIV: fputs ("div", stream); break;
447 case EQ: fputs ("eq", stream); break;
448 case NE: fputs ("ne", stream); break;
449 case GT: case LT: fputs ("gt", stream); break;
450 case GE: case LE: fputs ("ge", stream); break;
451 case GTU: case LTU: fputs ("gtu", stream); break;
452 case GEU: case LEU: fputs ("geu", stream); break;
458 if (GET_CODE (x) == MEM
459 && GET_CODE (XEXP (x, 0)) == PLUS
460 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
461 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
466 if (GET_CODE (x) != MEM)
469 switch (GET_CODE (x))
473 print_operand (stream, x, 0);
474 fputs (", 0", stream);
478 print_operand (stream, XEXP (x, 0), 0);
479 fputs (", ", stream);
480 print_operand (stream, XEXP (x, 1), 0);
489 if (x == CONST0_RTX (GET_MODE (x)))
491 fprintf ((stream), "r63");
496 if (GET_CODE (x) == CONST_INT)
498 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
505 switch (GET_CODE (x))
507 /* FIXME: We need this on SHmedia32 because reload generates
508 some sign-extended HI or QI loads into DImode registers
509 but, because Pmode is SImode, the address ends up with a
510 subreg:SI of the DImode register. Maybe reload should be
511 fixed so as to apply alter_subreg to such loads? */
513 if (SUBREG_BYTE (x) != 0
514 || GET_CODE (SUBREG_REG (x)) != REG)
521 if (FP_REGISTER_P (REGNO (x))
522 && GET_MODE (x) == V16SFmode)
523 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
524 else if (FP_REGISTER_P (REGNO (x))
525 && GET_MODE (x) == V4SFmode)
526 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
527 else if (GET_CODE (x) == REG
528 && GET_MODE (x) == V2SFmode)
529 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
530 else if (FP_REGISTER_P (REGNO (x))
531 && GET_MODE_SIZE (GET_MODE (x)) > 4)
532 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
534 fputs (reg_names[REGNO (x)], (stream));
538 output_address (XEXP (x, 0));
543 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
544 && GET_MODE (XEXP (x, 0)) == DImode
545 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
546 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
548 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
551 if (GET_CODE (val) == ASHIFTRT)
554 if (GET_CODE (XEXP (val, 0)) == CONST)
556 output_addr_const (stream, XEXP (val, 0));
557 if (GET_CODE (XEXP (val, 0)) == CONST)
559 fputs (" >> ", stream);
560 output_addr_const (stream, XEXP (val, 1));
565 if (GET_CODE (val) == CONST)
567 output_addr_const (stream, val);
568 if (GET_CODE (val) == CONST)
571 fputs (" & 65535)", stream);
579 output_addr_const (stream, x);
586 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
588 force_into (value, target)
591 value = force_operand (value, target);
592 if (! rtx_equal_p (value, target))
593 emit_insn (gen_move_insn (target, value));
596 /* Emit code to perform a block move. Choose the best method.
598 OPERANDS[0] is the destination.
599 OPERANDS[1] is the source.
600 OPERANDS[2] is the size.
601 OPERANDS[3] is the alignment safe to use. */
604 expand_block_move (operands)
607 int align = INTVAL (operands[3]);
608 int constp = (GET_CODE (operands[2]) == CONST_INT);
609 int bytes = (constp ? INTVAL (operands[2]) : 0);
611 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
612 alignment, or if it isn't a multiple of 4 bytes, then fail. */
613 if (! constp || align < 4 || (bytes % 4 != 0))
620 else if (bytes == 12)
625 rtx r4 = gen_rtx (REG, SImode, 4);
626 rtx r5 = gen_rtx (REG, SImode, 5);
628 entry_name = get_identifier ("__movstrSI12_i4");
630 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
631 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
632 force_into (XEXP (operands[0], 0), r4);
633 force_into (XEXP (operands[1], 0), r5);
634 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
637 else if (! TARGET_SMALLCODE)
643 rtx r4 = gen_rtx (REG, SImode, 4);
644 rtx r5 = gen_rtx (REG, SImode, 5);
645 rtx r6 = gen_rtx (REG, SImode, 6);
647 entry_name = get_identifier (bytes & 4
649 : "__movstr_i4_even");
650 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
651 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
652 force_into (XEXP (operands[0], 0), r4);
653 force_into (XEXP (operands[1], 0), r5);
656 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
657 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
669 rtx r4 = gen_rtx_REG (SImode, 4);
670 rtx r5 = gen_rtx_REG (SImode, 5);
672 sprintf (entry, "__movstrSI%d", bytes);
673 entry_name = get_identifier (entry);
674 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
675 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
676 force_into (XEXP (operands[0], 0), r4);
677 force_into (XEXP (operands[1], 0), r5);
678 emit_insn (gen_block_move_real (func_addr_rtx));
682 /* This is the same number of bytes as a memcpy call, but to a different
683 less common function name, so this will occasionally use more space. */
684 if (! TARGET_SMALLCODE)
689 int final_switch, while_loop;
690 rtx r4 = gen_rtx_REG (SImode, 4);
691 rtx r5 = gen_rtx_REG (SImode, 5);
692 rtx r6 = gen_rtx_REG (SImode, 6);
694 entry_name = get_identifier ("__movstr");
695 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
696 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
697 force_into (XEXP (operands[0], 0), r4);
698 force_into (XEXP (operands[1], 0), r5);
700 /* r6 controls the size of the move. 16 is decremented from it
701 for each 64 bytes moved. Then the negative bit left over is used
702 as an index into a list of move instructions. e.g., a 72 byte move
703 would be set up with size(r6) = 14, for one iteration through the
704 big while loop, and a switch of -2 for the last part. */
706 final_switch = 16 - ((bytes / 4) % 16);
707 while_loop = ((bytes / 4) / 16 - 1) * 16;
708 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
709 emit_insn (gen_block_lump_real (func_addr_rtx));
716 /* Prepare operands for a move define_expand; specifically, one of the
717 operands must be in a register. */
720 prepare_move_operands (operands, mode)
722 enum machine_mode mode;
724 if ((mode == SImode || mode == DImode)
726 && ! ((mode == Pmode || mode == ptr_mode)
727 && tls_symbolic_operand (operands[1], Pmode) != 0))
730 if (SYMBOLIC_CONST_P (operands[1]))
732 if (GET_CODE (operands[0]) == MEM)
733 operands[1] = force_reg (Pmode, operands[1]);
734 else if (TARGET_SHMEDIA
735 && GET_CODE (operands[1]) == LABEL_REF
736 && target_reg_operand (operands[0], mode))
740 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
741 operands[1] = legitimize_pic_address (operands[1], mode, temp);
744 else if (GET_CODE (operands[1]) == CONST
745 && GET_CODE (XEXP (operands[1], 0)) == PLUS
746 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
748 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
749 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
751 operands[1] = expand_binop (mode, add_optab, temp,
752 XEXP (XEXP (operands[1], 0), 1),
753 no_new_pseudos ? temp
754 : gen_reg_rtx (Pmode),
759 if (! reload_in_progress && ! reload_completed)
761 /* Copy the source to a register if both operands aren't registers. */
762 if (! register_operand (operands[0], mode)
763 && ! sh_register_operand (operands[1], mode))
764 operands[1] = copy_to_mode_reg (mode, operands[1]);
766 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
768 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
769 except that we can't use that function because it is static. */
770 rtx new = change_address (operands[0], mode, 0);
771 MEM_COPY_ATTRIBUTES (new, operands[0]);
775 /* This case can happen while generating code to move the result
776 of a library call to the target. Reject `st r0,@(rX,rY)' because
777 reload will fail to find a spill register for rX, since r0 is already
778 being used for the source. */
779 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
780 && GET_CODE (operands[0]) == MEM
781 && GET_CODE (XEXP (operands[0], 0)) == PLUS
782 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
783 operands[1] = copy_to_mode_reg (mode, operands[1]);
786 if (mode == Pmode || mode == ptr_mode)
789 enum tls_model tls_kind;
793 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
795 rtx tga_op1, tga_ret, tmp, tmp2;
800 case TLS_MODEL_GLOBAL_DYNAMIC:
801 tga_ret = gen_rtx_REG (Pmode, R0_REG);
802 emit_insn (gen_tls_global_dynamic (tga_ret, op1));
806 case TLS_MODEL_LOCAL_DYNAMIC:
807 tga_ret = gen_rtx_REG (Pmode, R0_REG);
808 emit_insn (gen_tls_local_dynamic (tga_ret, op1));
810 tmp = gen_reg_rtx (Pmode);
811 emit_move_insn (tmp, tga_ret);
813 if (register_operand (op0, Pmode))
816 tmp2 = gen_reg_rtx (Pmode);
818 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
822 case TLS_MODEL_INITIAL_EXEC:
824 emit_insn (gen_GOTaddr2picreg ());
825 tga_op1 = gen_reg_rtx (Pmode);
826 tmp = gen_sym2GOTTPOFF (op1);
827 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
831 case TLS_MODEL_LOCAL_EXEC:
832 tmp2 = gen_reg_rtx (Pmode);
833 emit_insn (gen_load_gbr (tmp2));
834 tmp = gen_reg_rtx (Pmode);
835 emit_insn (gen_symTPOFF2reg (tmp, op1));
836 RTX_UNCHANGING_P (tmp) = 1;
838 if (register_operand (op0, Pmode))
841 op1 = gen_reg_rtx (Pmode);
843 emit_insn (gen_addsi3 (op1, tmp, tmp2));
856 /* Prepare the operands for an scc instruction; make sure that the
857 compare has been done. */
859 prepare_scc_operands (code)
862 rtx t_reg = gen_rtx_REG (SImode, T_REG);
863 enum rtx_code oldcode = code;
864 enum machine_mode mode;
866 /* First need a compare insn. */
870 /* It isn't possible to handle this case. */
889 rtx tmp = sh_compare_op0;
890 sh_compare_op0 = sh_compare_op1;
891 sh_compare_op1 = tmp;
894 mode = GET_MODE (sh_compare_op0);
895 if (mode == VOIDmode)
896 mode = GET_MODE (sh_compare_op1);
898 sh_compare_op0 = force_reg (mode, sh_compare_op0);
899 if ((code != EQ && code != NE
900 && (sh_compare_op1 != const0_rtx
901 || code == GTU || code == GEU || code == LTU || code == LEU))
902 || (mode == DImode && sh_compare_op1 != const0_rtx)
903 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
904 sh_compare_op1 = force_reg (mode, sh_compare_op1);
906 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
907 (mode == SFmode ? emit_sf_insn : emit_df_insn)
908 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
909 gen_rtx (SET, VOIDmode, t_reg,
910 gen_rtx (code, SImode,
911 sh_compare_op0, sh_compare_op1)),
912 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
914 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
915 gen_rtx (code, SImode, sh_compare_op0,
921 /* Called from the md file, set up the operands of a compare instruction. */
924 from_compare (operands, code)
928 enum machine_mode mode = GET_MODE (sh_compare_op0);
930 if (mode == VOIDmode)
931 mode = GET_MODE (sh_compare_op1);
934 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
936 /* Force args into regs, since we can't use constants here. */
937 sh_compare_op0 = force_reg (mode, sh_compare_op0);
938 if (sh_compare_op1 != const0_rtx
939 || code == GTU || code == GEU
940 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
941 sh_compare_op1 = force_reg (mode, sh_compare_op1);
943 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
945 from_compare (operands, GT);
946 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
949 insn = gen_rtx_SET (VOIDmode,
950 gen_rtx_REG (SImode, T_REG),
951 gen_rtx (code, SImode, sh_compare_op0,
953 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
955 insn = gen_rtx (PARALLEL, VOIDmode,
957 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
958 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
964 /* Functions to output assembly code. */
966 /* Return a sequence of instructions to perform DI or DF move.
968 Since the SH cannot move a DI or DF in one instruction, we have
969 to take care when we see overlapping source and dest registers. */
972 output_movedouble (insn, operands, mode)
973 rtx insn ATTRIBUTE_UNUSED;
975 enum machine_mode mode;
977 rtx dst = operands[0];
978 rtx src = operands[1];
980 if (GET_CODE (dst) == MEM
981 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
982 return "mov.l %T1,%0\n\tmov.l %1,%0";
984 if (register_operand (dst, mode)
985 && register_operand (src, mode))
987 if (REGNO (src) == MACH_REG)
988 return "sts mach,%S0\n\tsts macl,%R0";
990 /* When mov.d r1,r2 do r2->r3 then r1->r2;
991 when mov.d r1,r0 do r1->r0 then r2->r1. */
993 if (REGNO (src) + 1 == REGNO (dst))
994 return "mov %T1,%T0\n\tmov %1,%0";
996 return "mov %1,%0\n\tmov %T1,%T0";
998 else if (GET_CODE (src) == CONST_INT)
1000 if (INTVAL (src) < 0)
1001 output_asm_insn ("mov #-1,%S0", operands);
1003 output_asm_insn ("mov #0,%S0", operands);
1005 return "mov %1,%R0";
1007 else if (GET_CODE (src) == MEM)
1010 int dreg = REGNO (dst);
1011 rtx inside = XEXP (src, 0);
1013 if (GET_CODE (inside) == REG)
1014 ptrreg = REGNO (inside);
1015 else if (GET_CODE (inside) == SUBREG)
1016 ptrreg = subreg_regno (inside);
1017 else if (GET_CODE (inside) == PLUS)
1019 ptrreg = REGNO (XEXP (inside, 0));
1020 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1021 an offsettable address. Unfortunately, offsettable addresses use
1022 QImode to check the offset, and a QImode offsettable address
1023 requires r0 for the other operand, which is not currently
1024 supported, so we can't use the 'o' constraint.
1025 Thus we must check for and handle r0+REG addresses here.
1026 We punt for now, since this is likely very rare. */
1027 if (GET_CODE (XEXP (inside, 1)) == REG)
1030 else if (GET_CODE (inside) == LABEL_REF)
1031 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1032 else if (GET_CODE (inside) == POST_INC)
1033 return "mov.l %1,%0\n\tmov.l %1,%T0";
1037 /* Work out the safe way to copy. Copy into the second half first. */
1039 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1042 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1045 /* Print an instruction which would have gone into a delay slot after
1046 another instruction, but couldn't because the other instruction expanded
1047 into a sequence where putting the slot insn at the end wouldn't work. */
1053 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
1055 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1059 output_far_jump (insn, op)
1063 struct { rtx lab, reg, op; } this;
1064 rtx braf_base_lab = NULL_RTX;
1067 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1070 this.lab = gen_label_rtx ();
1074 && offset - get_attr_length (insn) <= 32766)
1077 jump = "mov.w %O0,%1; braf %1";
1085 jump = "mov.l %O0,%1; braf %1";
1087 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1090 jump = "mov.l %O0,%1; jmp @%1";
1092 /* If we have a scratch register available, use it. */
1093 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1094 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1096 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1097 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1098 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1099 output_asm_insn (jump, &this.lab);
1100 if (dbr_sequence_length ())
1101 print_slot (final_sequence);
1103 output_asm_insn ("nop", 0);
1107 /* Output the delay slot insn first if any. */
1108 if (dbr_sequence_length ())
1109 print_slot (final_sequence);
1111 this.reg = gen_rtx_REG (SImode, 13);
1112 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1113 Fortunately, MACL is fixed and call-clobbered, and we never
1114 need its value across jumps, so save r13 in it instead of in
1117 output_asm_insn ("lds r13, macl", 0);
1119 output_asm_insn ("mov.l r13,@-r15", 0);
1120 output_asm_insn (jump, &this.lab);
1122 output_asm_insn ("sts macl, r13", 0);
1124 output_asm_insn ("mov.l @r15+,r13", 0);
1126 if (far && flag_pic && TARGET_SH2)
1128 braf_base_lab = gen_label_rtx ();
1129 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1130 CODE_LABEL_NUMBER (braf_base_lab));
1133 output_asm_insn (".align 2", 0);
1134 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1136 if (far && flag_pic)
1139 this.lab = braf_base_lab;
1140 output_asm_insn (".long %O2-%O0", &this.lab);
1143 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1147 /* Local label counter, used for constants in the pool and inside
1148 pattern branches. */
1150 static int lf = 100;
1152 /* Output code for ordinary branches. */
1155 output_branch (logic, insn, operands)
1160 switch (get_attr_length (insn))
1163 /* This can happen if filling the delay slot has caused a forward
1164 branch to exceed its range (we could reverse it, but only
1165 when we know we won't overextend other branches; this should
1166 best be handled by relaxation).
1167 It can also happen when other condbranches hoist delay slot insn
1168 from their destination, thus leading to code size increase.
1169 But the branch will still be in the range -4092..+4098 bytes. */
1174 /* The call to print_slot will clobber the operands. */
1175 rtx op0 = operands[0];
1177 /* If the instruction in the delay slot is annulled (true), then
1178 there is no delay slot where we can put it now. The only safe
1179 place for it is after the label. final will do that by default. */
1182 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1184 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1185 ASSEMBLER_DIALECT ? "/" : ".", label);
1186 print_slot (final_sequence);
1189 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1191 output_asm_insn ("bra\t%l0", &op0);
1192 fprintf (asm_out_file, "\tnop\n");
1193 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1197 /* When relaxing, handle this like a short branch. The linker
1198 will fix it up if it still doesn't fit after relaxation. */
1200 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1202 /* These are for SH2e, in which we have to account for the
1203 extra nop because of the hardware bug in annulled branches. */
1210 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1212 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1214 ASSEMBLER_DIALECT ? "/" : ".", label);
1215 fprintf (asm_out_file, "\tnop\n");
1216 output_asm_insn ("bra\t%l0", operands);
1217 fprintf (asm_out_file, "\tnop\n");
1218 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1222 /* When relaxing, fall through. */
1227 sprintf (buffer, "b%s%ss\t%%l0",
1229 ASSEMBLER_DIALECT ? "/" : ".");
1230 output_asm_insn (buffer, &operands[0]);
1235 /* There should be no longer branches now - that would
1236 indicate that something has destroyed the branches set
1237 up in machine_dependent_reorg. */
1243 output_branchy_insn (code, template, insn, operands)
1245 const char *template;
1249 rtx next_insn = NEXT_INSN (insn);
1251 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1253 rtx src = SET_SRC (PATTERN (next_insn));
1254 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1256 /* Following branch not taken */
1257 operands[9] = gen_label_rtx ();
1258 emit_label_after (operands[9], next_insn);
1259 INSN_ADDRESSES_NEW (operands[9],
1260 INSN_ADDRESSES (INSN_UID (next_insn))
1261 + get_attr_length (next_insn));
1266 int offset = (branch_dest (next_insn)
1267 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1268 if (offset >= -252 && offset <= 258)
1270 if (GET_CODE (src) == IF_THEN_ELSE)
1272 src = XEXP (src, 1);
1278 operands[9] = gen_label_rtx ();
1279 emit_label_after (operands[9], insn);
1280 INSN_ADDRESSES_NEW (operands[9],
1281 INSN_ADDRESSES (INSN_UID (insn))
1282 + get_attr_length (insn));
1287 output_ieee_ccmpeq (insn, operands)
1288 rtx insn, *operands;
1290 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1293 /* Output to FILE the start of the assembler file. */
1296 output_file_start (file)
1299 output_file_directive (file, main_input_filename);
1301 /* Switch to the data section so that the coffsem symbol
1302 isn't in the text section. */
1305 if (TARGET_LITTLE_ENDIAN)
1306 fprintf (file, "\t.little\n");
1308 if (TARGET_SHCOMPACT)
1309 fprintf (file, "\t.mode\tSHcompact\n");
1310 else if (TARGET_SHMEDIA)
1311 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1312 TARGET_SHMEDIA64 ? 64 : 32);
1315 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1318 unspec_caller_rtx_p (pat)
1321 switch (GET_CODE (pat))
1324 return unspec_caller_rtx_p (XEXP (pat, 0));
1327 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1329 return unspec_caller_rtx_p (XEXP (pat, 1));
1331 if (XINT (pat, 1) == UNSPEC_CALLER)
1340 /* Indicate that INSN cannot be duplicated. This is true for insn
1341 that generates an unique label. */
1344 sh_cannot_copy_insn_p (insn)
1349 if (!reload_completed || !flag_pic)
1352 if (GET_CODE (insn) != INSN)
1354 if (asm_noperands (insn) >= 0)
1357 pat = PATTERN (insn);
1358 if (GET_CODE (pat) != SET)
1360 pat = SET_SRC (pat);
1362 if (unspec_caller_rtx_p (pat))
1368 /* Actual number of instructions used to make a shift by N. */
1369 static const char ashiftrt_insns[] =
1370 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1372 /* Left shift and logical right shift are the same. */
1373 static const char shift_insns[] =
1374 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1376 /* Individual shift amounts needed to get the above length sequences.
1377 One bit right shifts clobber the T bit, so when possible, put one bit
1378 shifts in the middle of the sequence, so the ends are eligible for
1379 branch delay slots. */
1380 static const short shift_amounts[32][5] = {
1381 {0}, {1}, {2}, {2, 1},
1382 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1383 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1384 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1385 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1386 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1387 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1388 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1390 /* Likewise, but for shift amounts < 16, up to three highmost bits
1391 might be clobbered. This is typically used when combined with some
1392 kind of sign or zero extension. */
1394 static const char ext_shift_insns[] =
1395 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1397 static const short ext_shift_amounts[32][4] = {
1398 {0}, {1}, {2}, {2, 1},
1399 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1400 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1401 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1402 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1403 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1404 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1405 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1407 /* Assuming we have a value that has been sign-extended by at least one bit,
1408 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1409 to shift it by N without data loss, and quicker than by other means? */
1410 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1412 /* This is used in length attributes in sh.md to help compute the length
1413 of arbitrary constant shift instructions. */
1416 shift_insns_rtx (insn)
1419 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1420 int shift_count = INTVAL (XEXP (set_src, 1));
1421 enum rtx_code shift_code = GET_CODE (set_src);
1426 return ashiftrt_insns[shift_count];
1429 return shift_insns[shift_count];
1435 /* Return the cost of a shift. */
1446 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1448 if (GET_MODE (x) == DImode
1449 && GET_CODE (XEXP (x, 1)) == CONST_INT
1450 && INTVAL (XEXP (x, 1)) == 1)
1453 /* Everything else is invalid, because there is no pattern for it. */
1456 /* If shift by a non constant, then this will be expensive. */
1457 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1458 return SH_DYNAMIC_SHIFT_COST;
1460 value = INTVAL (XEXP (x, 1));
1462 /* Otherwise, return the true cost in instructions. */
1463 if (GET_CODE (x) == ASHIFTRT)
1465 int cost = ashiftrt_insns[value];
1466 /* If SH3, then we put the constant in a reg and use shad. */
1467 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1468 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1472 return shift_insns[value];
1475 /* Return the cost of an AND operation. */
1483 /* Anding with a register is a single cycle and instruction. */
1484 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1487 i = INTVAL (XEXP (x, 1));
1491 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1492 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1493 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1499 /* These constants are single cycle extu.[bw] instructions. */
1500 if (i == 0xff || i == 0xffff)
1502 /* Constants that can be used in an and immediate instruction in a single
1503 cycle, but this requires r0, so make it a little more expensive. */
1504 if (CONST_OK_FOR_K08 (i))
1506 /* Constants that can be loaded with a mov immediate and an and.
1507 This case is probably unnecessary. */
1508 if (CONST_OK_FOR_I08 (i))
1510 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1511 This case is probably unnecessary. */
1515 /* Return the cost of an addition or a subtraction. */
1521 /* Adding a register is a single cycle insn. */
1522 if (GET_CODE (XEXP (x, 1)) == REG
1523 || GET_CODE (XEXP (x, 1)) == SUBREG)
1526 /* Likewise for small constants. */
1527 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1528 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1532 switch (GET_CODE (XEXP (x, 1)))
1537 return TARGET_SHMEDIA64 ? 5 : 3;
1540 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1542 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1544 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1552 /* Any other constant requires a 2 cycle pc-relative load plus an
1557 /* Return the cost of a multiply. */
1560 rtx x ATTRIBUTE_UNUSED;
1567 /* We have a mul insn, so we can never take more than the mul and the
1568 read of the mac reg, but count more because of the latency and extra
1570 if (TARGET_SMALLCODE)
1575 /* If we're aiming at small code, then just count the number of
1576 insns in a multiply call sequence. */
1577 if (TARGET_SMALLCODE)
1580 /* Otherwise count all the insns in the routine we'd be calling too. */
1584 /* Compute a (partial) cost for rtx X. Return true if the complete
1585 cost has been computed, and false if subexpressions should be
1586 scanned. In either case, *TOTAL contains the cost result. */
1589 sh_rtx_costs (x, code, outer_code, total)
1591 int code, outer_code, *total;
1598 if (INTVAL (x) == 0)
1600 else if (outer_code == AND && and_operand ((x), DImode))
1602 else if ((outer_code == IOR || outer_code == XOR
1603 || outer_code == PLUS)
1604 && CONST_OK_FOR_I10 (INTVAL (x)))
1606 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1607 *total = COSTS_N_INSNS (outer_code != SET);
1608 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1609 *total = COSTS_N_INSNS (2);
1610 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1611 *total = COSTS_N_INSNS (3);
1613 *total = COSTS_N_INSNS (4);
1616 if (CONST_OK_FOR_I08 (INTVAL (x)))
1618 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1619 && CONST_OK_FOR_K08 (INTVAL (x)))
1628 if (TARGET_SHMEDIA64)
1629 *total = COSTS_N_INSNS (4);
1630 else if (TARGET_SHMEDIA32)
1631 *total = COSTS_N_INSNS (2);
1638 *total = COSTS_N_INSNS (4);
1644 *total = COSTS_N_INSNS (addsubcosts (x));
1648 *total = COSTS_N_INSNS (andcosts (x));
1652 *total = COSTS_N_INSNS (multcosts (x));
1658 *total = COSTS_N_INSNS (shiftcosts (x));
1665 *total = COSTS_N_INSNS (20);
1678 /* Compute the cost of an address. For the SH, all valid addresses are
1679 the same cost. Use a slightly higher cost for reg + reg addressing,
1680 since it increases pressure on r0. */
1686 return (GET_CODE (X) == PLUS
1687 && ! CONSTANT_P (XEXP (X, 1))
1688 && ! TARGET_SHMEDIA ? 1 : 0);
1691 /* Code to expand a shift. */
1694 gen_ashift (type, n, reg)
1699 /* Negative values here come from the shift_amounts array. */
1712 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1716 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1718 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1721 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1726 /* Same for HImode */
1729 gen_ashift_hi (type, n, reg)
1734 /* Negative values here come from the shift_amounts array. */
1748 /* We don't have HImode right shift operations because using the
1749 ordinary 32 bit shift instructions for that doesn't generate proper
1750 zero/sign extension.
1751 gen_ashift_hi is only called in contexts where we know that the
1752 sign extension works out correctly. */
1755 if (GET_CODE (reg) == SUBREG)
1757 offset = SUBREG_BYTE (reg);
1758 reg = SUBREG_REG (reg);
1760 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1764 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1769 /* Output RTL to split a constant shift into its component SH constant
1770 shift instructions. */
1773 gen_shifty_op (code, operands)
1777 int value = INTVAL (operands[2]);
1780 /* Truncate the shift count in case it is out of bounds. */
1781 value = value & 0x1f;
1785 if (code == LSHIFTRT)
1787 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1788 emit_insn (gen_movt (operands[0]));
1791 else if (code == ASHIFT)
1793 /* There is a two instruction sequence for 31 bit left shifts,
1794 but it requires r0. */
1795 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1797 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1798 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1803 else if (value == 0)
1805 /* This can happen when not optimizing. We must output something here
1806 to prevent the compiler from aborting in final.c after the try_split
1808 emit_insn (gen_nop ());
1812 max = shift_insns[value];
1813 for (i = 0; i < max; i++)
1814 gen_ashift (code, shift_amounts[value][i], operands[0]);
1817 /* Same as above, but optimized for values where the topmost bits don't
1821 gen_shifty_hi_op (code, operands)
1825 int value = INTVAL (operands[2]);
1827 void (*gen_fun) PARAMS ((int, int, rtx));
1829 /* This operation is used by and_shl for SImode values with a few
1830 high bits known to be cleared. */
1834 emit_insn (gen_nop ());
1838 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1841 max = ext_shift_insns[value];
1842 for (i = 0; i < max; i++)
1843 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1846 /* When shifting right, emit the shifts in reverse order, so that
1847 solitary negative values come first. */
1848 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1849 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1852 /* Output RTL for an arithmetic right shift. */
1854 /* ??? Rewrite to use super-optimizer sequences. */
1857 expand_ashiftrt (operands)
1868 if (GET_CODE (operands[2]) != CONST_INT)
1870 rtx count = copy_to_mode_reg (SImode, operands[2]);
1871 emit_insn (gen_negsi2 (count, count));
1872 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1875 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1876 > 1 + SH_DYNAMIC_SHIFT_COST)
1879 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1880 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1884 if (GET_CODE (operands[2]) != CONST_INT)
1887 value = INTVAL (operands[2]) & 31;
1891 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1894 else if (value >= 16 && value <= 19)
1896 wrk = gen_reg_rtx (SImode);
1897 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1900 gen_ashift (ASHIFTRT, 1, wrk);
1901 emit_move_insn (operands[0], wrk);
1904 /* Expand a short sequence inline, longer call a magic routine. */
1905 else if (value <= 5)
1907 wrk = gen_reg_rtx (SImode);
1908 emit_move_insn (wrk, operands[1]);
1910 gen_ashift (ASHIFTRT, 1, wrk);
1911 emit_move_insn (operands[0], wrk);
1915 wrk = gen_reg_rtx (Pmode);
1917 /* Load the value into an arg reg and call a helper. */
1918 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1919 sprintf (func, "__ashiftrt_r4_%d", value);
1920 func_name = get_identifier (func);
1921 sym = function_symbol (IDENTIFIER_POINTER (func_name));
1922 emit_move_insn (wrk, sym);
1923 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1924 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1929 sh_dynamicalize_shift_p (count)
1932 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1935 /* Try to find a good way to implement the combiner pattern
1936 [(set (match_operand:SI 0 "register_operand" "r")
1937 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1938 (match_operand:SI 2 "const_int_operand" "n"))
1939 (match_operand:SI 3 "const_int_operand" "n"))) .
1940 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1941 return 0 for simple right / left or left/right shift combination.
1942 return 1 for a combination of shifts with zero_extend.
1943 return 2 for a combination of shifts with an AND that needs r0.
1944 return 3 for a combination of shifts with an AND that needs an extra
1945 scratch register, when the three highmost bits of the AND mask are clear.
1946 return 4 for a combination of shifts with an AND that needs an extra
1947 scratch register, when any of the three highmost bits of the AND mask
1949 If ATTRP is set, store an initial right shift width in ATTRP[0],
1950 and the instruction length in ATTRP[1] . These values are not valid
1952 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1953 shift_amounts for the last shift value that is to be used before the
1956 shl_and_kind (left_rtx, mask_rtx, attrp)
1957 rtx left_rtx, mask_rtx;
1960 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1961 int left = INTVAL (left_rtx), right;
1963 int cost, best_cost = 10000;
1964 int best_right = 0, best_len = 0;
1968 if (left < 0 || left > 31)
1970 if (GET_CODE (mask_rtx) == CONST_INT)
1971 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1973 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1974 /* Can this be expressed as a right shift / left shift pair ? */
1975 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1976 right = exact_log2 (lsb);
1977 mask2 = ~(mask + lsb - 1);
1978 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1979 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1981 best_cost = shift_insns[right] + shift_insns[right + left];
1982 /* mask has no trailing zeroes <==> ! right */
1983 else if (! right && mask2 == ~(lsb2 - 1))
1985 int late_right = exact_log2 (lsb2);
1986 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1988 /* Try to use zero extend */
1989 if (mask2 == ~(lsb2 - 1))
1993 for (width = 8; width <= 16; width += 8)
1995 /* Can we zero-extend right away? */
1996 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1999 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2000 if (cost < best_cost)
2011 /* ??? Could try to put zero extend into initial right shift,
2012 or even shift a bit left before the right shift. */
2013 /* Determine value of first part of left shift, to get to the
2014 zero extend cut-off point. */
2015 first = width - exact_log2 (lsb2) + right;
2016 if (first >= 0 && right + left - first >= 0)
2018 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2019 + ext_shift_insns[right + left - first];
2020 if (cost < best_cost)
2032 /* Try to use r0 AND pattern */
2033 for (i = 0; i <= 2; i++)
2037 if (! CONST_OK_FOR_K08 (mask >> i))
2039 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2040 if (cost < best_cost)
2045 best_len = cost - 1;
2048 /* Try to use a scratch register to hold the AND operand. */
2049 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2050 for (i = 0; i <= 2; i++)
2054 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2055 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2056 if (cost < best_cost)
2061 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2067 attrp[0] = best_right;
2068 attrp[1] = best_len;
2073 /* This is used in length attributes of the unnamed instructions
2074 corresponding to shl_and_kind return values of 1 and 2. */
2076 shl_and_length (insn)
2079 rtx set_src, left_rtx, mask_rtx;
2082 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2083 left_rtx = XEXP (XEXP (set_src, 0), 1);
2084 mask_rtx = XEXP (set_src, 1);
2085 shl_and_kind (left_rtx, mask_rtx, attributes);
2086 return attributes[1];
2089 /* This is used in length attribute of the and_shl_scratch instruction. */
2092 shl_and_scr_length (insn)
2095 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2096 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2097 rtx op = XEXP (set_src, 0);
2098 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2099 op = XEXP (XEXP (op, 0), 0);
2100 return len + shift_insns[INTVAL (XEXP (op, 1))];
2103 /* Generating rtl? */
2104 extern int rtx_equal_function_value_matters;
2106 /* Generate rtl for instructions for which shl_and_kind advised a particular
2107 method of generating them, i.e. returned zero. */
2110 gen_shl_and (dest, left_rtx, mask_rtx, source)
2111 rtx dest, left_rtx, mask_rtx, source;
2114 unsigned HOST_WIDE_INT mask;
2115 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2116 int right, total_shift;
2117 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
2119 right = attributes[0];
2120 total_shift = INTVAL (left_rtx) + right;
2121 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2128 int first = attributes[2];
2133 emit_insn ((mask << right) <= 0xff
2134 ? gen_zero_extendqisi2(dest,
2135 gen_lowpart (QImode, source))
2136 : gen_zero_extendhisi2(dest,
2137 gen_lowpart (HImode, source)));
2141 emit_insn (gen_movsi (dest, source));
2145 operands[2] = GEN_INT (right);
2146 gen_shifty_hi_op (LSHIFTRT, operands);
2150 operands[2] = GEN_INT (first);
2151 gen_shifty_hi_op (ASHIFT, operands);
2152 total_shift -= first;
2156 emit_insn (mask <= 0xff
2157 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2158 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2159 if (total_shift > 0)
2161 operands[2] = GEN_INT (total_shift);
2162 gen_shifty_hi_op (ASHIFT, operands);
2167 shift_gen_fun = gen_shifty_op;
2169 /* If the topmost bit that matters is set, set the topmost bits
2170 that don't matter. This way, we might be able to get a shorter
2172 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2173 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2175 /* Don't expand fine-grained when combining, because that will
2176 make the pattern fail. */
2177 if (rtx_equal_function_value_matters
2178 || reload_in_progress || reload_completed)
2182 /* Cases 3 and 4 should be handled by this split
2183 only while combining */
2188 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2191 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2196 operands[2] = GEN_INT (total_shift);
2197 shift_gen_fun (ASHIFT, operands);
2204 if (kind != 4 && total_shift < 16)
2206 neg = -ext_shift_amounts[total_shift][1];
2208 neg -= ext_shift_amounts[total_shift][2];
2212 emit_insn (gen_and_shl_scratch (dest, source,
2215 GEN_INT (total_shift + neg),
2217 emit_insn (gen_movsi (dest, dest));
2224 /* Try to find a good way to implement the combiner pattern
2225 [(set (match_operand:SI 0 "register_operand" "=r")
2226 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2227 (match_operand:SI 2 "const_int_operand" "n")
2228 (match_operand:SI 3 "const_int_operand" "n")
2230 (clobber (reg:SI T_REG))]
2231 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2232 return 0 for simple left / right shift combination.
2233 return 1 for left shift / 8 bit sign extend / left shift.
2234 return 2 for left shift / 16 bit sign extend / left shift.
2235 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2236 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2237 return 5 for left shift / 16 bit sign extend / right shift
2238 return 6 for < 8 bit sign extend / left shift.
2239 return 7 for < 8 bit sign extend / left shift / single right shift.
2240 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2243 shl_sext_kind (left_rtx, size_rtx, costp)
2244 rtx left_rtx, size_rtx;
2247 int left, size, insize, ext;
2248 int cost = 0, best_cost;
2251 left = INTVAL (left_rtx);
2252 size = INTVAL (size_rtx);
2253 insize = size - left;
2256 /* Default to left / right shift. */
2258 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2261 /* 16 bit shift / sign extend / 16 bit shift */
2262 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2263 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2264 below, by alternative 3 or something even better. */
2265 if (cost < best_cost)
2271 /* Try a plain sign extend between two shifts. */
2272 for (ext = 16; ext >= insize; ext -= 8)
2276 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2277 if (cost < best_cost)
2279 kind = ext / (unsigned) 8;
2283 /* Check if we can do a sloppy shift with a final signed shift
2284 restoring the sign. */
2285 if (EXT_SHIFT_SIGNED (size - ext))
2286 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2287 /* If not, maybe it's still cheaper to do the second shift sloppy,
2288 and do a final sign extend? */
2289 else if (size <= 16)
2290 cost = ext_shift_insns[ext - insize] + 1
2291 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2294 if (cost < best_cost)
2296 kind = ext / (unsigned) 8 + 2;
2300 /* Check if we can sign extend in r0 */
2303 cost = 3 + shift_insns[left];
2304 if (cost < best_cost)
2309 /* Try the same with a final signed shift. */
2312 cost = 3 + ext_shift_insns[left + 1] + 1;
2313 if (cost < best_cost)
2322 /* Try to use a dynamic shift. */
2323 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2324 if (cost < best_cost)
2335 /* Function to be used in the length attribute of the instructions
2336 implementing this pattern. */
2339 shl_sext_length (insn)
2342 rtx set_src, left_rtx, size_rtx;
2345 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2346 left_rtx = XEXP (XEXP (set_src, 0), 1);
2347 size_rtx = XEXP (set_src, 1);
2348 shl_sext_kind (left_rtx, size_rtx, &cost);
2352 /* Generate rtl for this pattern */
2355 gen_shl_sext (dest, left_rtx, size_rtx, source)
2356 rtx dest, left_rtx, size_rtx, source;
2359 int left, size, insize, cost;
2362 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2363 left = INTVAL (left_rtx);
2364 size = INTVAL (size_rtx);
2365 insize = size - left;
2373 int ext = kind & 1 ? 8 : 16;
2374 int shift2 = size - ext;
2376 /* Don't expand fine-grained when combining, because that will
2377 make the pattern fail. */
2378 if (! rtx_equal_function_value_matters
2379 && ! reload_in_progress && ! reload_completed)
2381 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2382 emit_insn (gen_movsi (dest, source));
2386 emit_insn (gen_movsi (dest, source));
2390 operands[2] = GEN_INT (ext - insize);
2391 gen_shifty_hi_op (ASHIFT, operands);
2394 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2395 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2400 operands[2] = GEN_INT (shift2);
2401 gen_shifty_op (ASHIFT, operands);
2408 if (EXT_SHIFT_SIGNED (shift2))
2410 operands[2] = GEN_INT (shift2 + 1);
2411 gen_shifty_op (ASHIFT, operands);
2412 operands[2] = GEN_INT (1);
2413 gen_shifty_op (ASHIFTRT, operands);
2416 operands[2] = GEN_INT (shift2);
2417 gen_shifty_hi_op (ASHIFT, operands);
2421 operands[2] = GEN_INT (-shift2);
2422 gen_shifty_hi_op (LSHIFTRT, operands);
2424 emit_insn (size <= 8
2425 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2426 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2433 if (! rtx_equal_function_value_matters
2434 && ! reload_in_progress && ! reload_completed)
2435 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2439 operands[2] = GEN_INT (16 - insize);
2440 gen_shifty_hi_op (ASHIFT, operands);
2441 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2443 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2445 gen_ashift (ASHIFTRT, 1, dest);
2450 /* Don't expand fine-grained when combining, because that will
2451 make the pattern fail. */
2452 if (! rtx_equal_function_value_matters
2453 && ! reload_in_progress && ! reload_completed)
2455 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2456 emit_insn (gen_movsi (dest, source));
2459 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2460 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2461 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2463 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2464 gen_shifty_op (ASHIFT, operands);
2466 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2474 /* Prefix a symbol_ref name with "datalabel". */
2477 gen_datalabel_ref (sym)
2480 if (GET_CODE (sym) == LABEL_REF)
2481 return gen_rtx_CONST (GET_MODE (sym),
2482 gen_rtx_UNSPEC (GET_MODE (sym),
2486 if (GET_CODE (sym) != SYMBOL_REF)
2493 /* The SH cannot load a large constant into a register, constants have to
2494 come from a pc relative load. The reference of a pc relative load
2495 instruction must be less than 1k infront of the instruction. This
2496 means that we often have to dump a constant inside a function, and
2497 generate code to branch around it.
2499 It is important to minimize this, since the branches will slow things
2500 down and make things bigger.
2502 Worst case code looks like:
2520 We fix this by performing a scan before scheduling, which notices which
2521 instructions need to have their operands fetched from the constant table
2522 and builds the table.
2526 scan, find an instruction which needs a pcrel move. Look forward, find the
2527 last barrier which is within MAX_COUNT bytes of the requirement.
2528 If there isn't one, make one. Process all the instructions between
2529 the find and the barrier.
2531 In the above example, we can tell that L3 is within 1k of L1, so
2532 the first move can be shrunk from the 3 insn+constant sequence into
2533 just 1 insn, and the constant moved to L3 to make:
2544 Then the second move becomes the target for the shortening process. */
2548 rtx value; /* Value in table. */
2549 rtx label; /* Label of value. */
2550 rtx wend; /* End of window. */
2551 enum machine_mode mode; /* Mode of value. */
2553 /* True if this constant is accessed as part of a post-increment
2554 sequence. Note that HImode constants are never accessed in this way. */
2555 bool part_of_sequence_p;
2558 /* The maximum number of constants that can fit into one pool, since
2559 the pc relative range is 0...1020 bytes and constants are at least 4
2562 #define MAX_POOL_SIZE (1020/4)
2563 static pool_node pool_vector[MAX_POOL_SIZE];
2564 static int pool_size;
2565 static rtx pool_window_label;
2566 static int pool_window_last;
2568 /* ??? If we need a constant in HImode which is the truncated value of a
2569 constant we need in SImode, we could combine the two entries thus saving
2570 two bytes. Is this common enough to be worth the effort of implementing
2573 /* ??? This stuff should be done at the same time that we shorten branches.
2574 As it is now, we must assume that all branches are the maximum size, and
2575 this causes us to almost always output constant pools sooner than
2578 /* Add a constant to the pool and return its label. */
2581 add_constant (x, mode, last_value)
2583 enum machine_mode mode;
2587 rtx lab, new, ref, newref;
2589 /* First see if we've already got it. */
2590 for (i = 0; i < pool_size; i++)
2592 if (x->code == pool_vector[i].value->code
2593 && mode == pool_vector[i].mode)
2595 if (x->code == CODE_LABEL)
2597 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2600 if (rtx_equal_p (x, pool_vector[i].value))
2605 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2607 new = gen_label_rtx ();
2608 LABEL_REFS (new) = pool_vector[i].label;
2609 pool_vector[i].label = lab = new;
2611 if (lab && pool_window_label)
2613 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2614 ref = pool_vector[pool_window_last].wend;
2615 LABEL_NEXTREF (newref) = ref;
2616 pool_vector[pool_window_last].wend = newref;
2619 pool_window_label = new;
2620 pool_window_last = i;
2626 /* Need a new one. */
2627 pool_vector[pool_size].value = x;
2628 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2631 pool_vector[pool_size - 1].part_of_sequence_p = true;
2634 lab = gen_label_rtx ();
2635 pool_vector[pool_size].mode = mode;
2636 pool_vector[pool_size].label = lab;
2637 pool_vector[pool_size].wend = NULL_RTX;
2638 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2639 if (lab && pool_window_label)
2641 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2642 ref = pool_vector[pool_window_last].wend;
2643 LABEL_NEXTREF (newref) = ref;
2644 pool_vector[pool_window_last].wend = newref;
2647 pool_window_label = lab;
2648 pool_window_last = pool_size;
2653 /* Output the literal table. */
2664 /* Do two passes, first time dump out the HI sized constants. */
2666 for (i = 0; i < pool_size; i++)
2668 pool_node *p = &pool_vector[i];
2670 if (p->mode == HImode)
2674 scan = emit_insn_after (gen_align_2 (), scan);
2677 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2678 scan = emit_label_after (lab, scan);
2679 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2681 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2683 lab = XEXP (ref, 0);
2684 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2687 else if (p->mode == DFmode)
2693 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2695 rtx align_insn = NULL_RTX;
2697 scan = emit_label_after (gen_label_rtx (), scan);
2698 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2701 for (i = 0; i < pool_size; i++)
2703 pool_node *p = &pool_vector[i];
2711 if (align_insn && !p->part_of_sequence_p)
2713 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2714 emit_label_before (lab, align_insn);
2715 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2717 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2719 lab = XEXP (ref, 0);
2720 emit_insn_before (gen_consttable_window_end (lab),
2723 delete_insn (align_insn);
2724 align_insn = NULL_RTX;
2729 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2730 scan = emit_label_after (lab, scan);
2731 scan = emit_insn_after (gen_consttable_4 (p->value,
2733 need_align = ! need_align;
2739 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2744 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2745 scan = emit_label_after (lab, scan);
2746 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2754 if (p->mode != HImode)
2756 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2758 lab = XEXP (ref, 0);
2759 scan = emit_insn_after (gen_consttable_window_end (lab),
2768 for (i = 0; i < pool_size; i++)
2770 pool_node *p = &pool_vector[i];
2781 scan = emit_label_after (gen_label_rtx (), scan);
2782 scan = emit_insn_after (gen_align_4 (), scan);
2784 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2785 scan = emit_label_after (lab, scan);
2786 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2794 scan = emit_label_after (gen_label_rtx (), scan);
2795 scan = emit_insn_after (gen_align_4 (), scan);
2797 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2798 scan = emit_label_after (lab, scan);
2799 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2807 if (p->mode != HImode)
2809 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2811 lab = XEXP (ref, 0);
2812 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2817 scan = emit_insn_after (gen_consttable_end (), scan);
2818 scan = emit_barrier_after (scan);
2820 pool_window_label = NULL_RTX;
2821 pool_window_last = 0;
2824 /* Return nonzero if constant would be an ok source for a
2825 mov.w instead of a mov.l. */
2831 return (GET_CODE (src) == CONST_INT
2832 && INTVAL (src) >= -32768
2833 && INTVAL (src) <= 32767);
2836 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2838 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2839 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2840 need to fix it if the input value is CONST_OK_FOR_I08. */
2846 if (GET_CODE (insn) == INSN)
2848 rtx pat = PATTERN (insn);
2849 if (GET_CODE (pat) == PARALLEL)
2850 pat = XVECEXP (pat, 0, 0);
2851 if (GET_CODE (pat) == SET
2852 /* We can load any 8 bit value if we don't care what the high
2853 order bits end up as. */
2854 && GET_MODE (SET_DEST (pat)) != QImode
2855 && (CONSTANT_P (SET_SRC (pat))
2856 /* Match mova_const. */
2857 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2858 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2859 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2861 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2862 && (fp_zero_operand (SET_SRC (pat))
2863 || fp_one_operand (SET_SRC (pat)))
2864 /* ??? If this is a -m4 or -m4-single compilation, in general
2865 we don't know the current setting of fpscr, so disable fldi.
2866 There is an exception if this was a register-register move
2867 before reload - and hence it was ascertained that we have
2868 single precision setting - and in a post-reload optimization
2869 we changed this to do a constant load. In that case
2870 we don't have an r0 clobber, hence we must use fldi. */
2871 && (! TARGET_SH4 || TARGET_FMOVD
2872 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2874 && GET_CODE (SET_DEST (pat)) == REG
2875 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2876 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2877 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
2888 return (GET_CODE (insn) == INSN
2889 && GET_CODE (PATTERN (insn)) == SET
2890 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2891 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2892 /* Don't match mova_const. */
2893 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2896 /* Find the last barrier from insn FROM which is close enough to hold the
2897 constant pool. If we can't find one, then create one near the end of
2901 find_barrier (num_mova, mova, from)
2912 int leading_mova = num_mova;
2913 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2917 /* For HImode: range is 510, add 4 because pc counts from address of
2918 second instruction after this one, subtract 2 for the jump instruction
2919 that we may need to emit before the table, subtract 2 for the instruction
2920 that fills the jump delay slot (in very rare cases, reorg will take an
2921 instruction from after the constant pool or will leave the delay slot
2922 empty). This gives 510.
2923 For SImode: range is 1020, add 4 because pc counts from address of
2924 second instruction after this one, subtract 2 in case pc is 2 byte
2925 aligned, subtract 2 for the jump instruction that we may need to emit
2926 before the table, subtract 2 for the instruction that fills the jump
2927 delay slot. This gives 1018. */
2929 /* The branch will always be shortened now that the reference address for
2930 forward branches is the successor address, thus we need no longer make
2931 adjustments to the [sh]i_limit for -O0. */
2936 while (from && count_si < si_limit && count_hi < hi_limit)
2938 int inc = get_attr_length (from);
2941 if (GET_CODE (from) == CODE_LABEL)
2944 new_align = 1 << label_to_alignment (from);
2945 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2946 new_align = 1 << barrier_align (from);
2952 if (GET_CODE (from) == BARRIER)
2955 found_barrier = from;
2957 /* If we are at the end of the function, or in front of an alignment
2958 instruction, we need not insert an extra alignment. We prefer
2959 this kind of barrier. */
2960 if (barrier_align (from) > 2)
2961 good_barrier = from;
2964 if (broken_move (from))
2967 enum machine_mode mode;
2969 pat = PATTERN (from);
2970 if (GET_CODE (pat) == PARALLEL)
2971 pat = XVECEXP (pat, 0, 0);
2972 src = SET_SRC (pat);
2973 dst = SET_DEST (pat);
2974 mode = GET_MODE (dst);
2976 /* We must explicitly check the mode, because sometimes the
2977 front end will generate code to load unsigned constants into
2978 HImode targets without properly sign extending them. */
2980 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2983 /* We put the short constants before the long constants, so
2984 we must count the length of short constants in the range
2985 for the long constants. */
2986 /* ??? This isn't optimal, but is easy to do. */
2991 /* We dump DF/DI constants before SF/SI ones, because
2992 the limit is the same, but the alignment requirements
2993 are higher. We may waste up to 4 additional bytes
2994 for alignment, and the DF/DI constant may have
2995 another SF/SI constant placed before it. */
2996 if (TARGET_SHCOMPACT
2998 && (mode == DFmode || mode == DImode))
3003 while (si_align > 2 && found_si + si_align - 2 > count_si)
3005 if (found_si > count_si)
3006 count_si = found_si;
3007 found_si += GET_MODE_SIZE (mode);
3009 si_limit -= GET_MODE_SIZE (mode);
3012 /* See the code in machine_dependent_reorg, which has a similar if
3013 statement that generates a new mova insn in many cases. */
3014 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3024 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3026 if (found_si > count_si)
3027 count_si = found_si;
3029 else if (GET_CODE (from) == JUMP_INSN
3030 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3031 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3035 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3037 /* We have just passed the barrier in front of the
3038 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3039 the ADDR_DIFF_VEC is accessed as data, just like our pool
3040 constants, this is a good opportunity to accommodate what
3041 we have gathered so far.
3042 If we waited any longer, we could end up at a barrier in
3043 front of code, which gives worse cache usage for separated
3044 instruction / data caches. */
3045 good_barrier = found_barrier;
3050 rtx body = PATTERN (from);
3051 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3054 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3055 else if (GET_CODE (from) == JUMP_INSN
3057 && ! TARGET_SMALLCODE)
3063 if (new_align > si_align)
3065 si_limit -= (count_si - 1) & (new_align - si_align);
3066 si_align = new_align;
3068 count_si = (count_si + new_align - 1) & -new_align;
3073 if (new_align > hi_align)
3075 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3076 hi_align = new_align;
3078 count_hi = (count_hi + new_align - 1) & -new_align;
3080 from = NEXT_INSN (from);
3087 /* Try as we might, the leading mova is out of range. Change
3088 it into a load (which will become a pcload) and retry. */
3089 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3090 INSN_CODE (mova) = -1;
3091 return find_barrier (0, 0, mova);
3095 /* Insert the constant pool table before the mova instruction,
3096 to prevent the mova label reference from going out of range. */
3098 good_barrier = found_barrier = barrier_before_mova;
3104 if (good_barrier && next_real_insn (found_barrier))
3105 found_barrier = good_barrier;
3109 /* We didn't find a barrier in time to dump our stuff,
3110 so we'll make one. */
3111 rtx label = gen_label_rtx ();
3113 /* If we exceeded the range, then we must back up over the last
3114 instruction we looked at. Otherwise, we just need to undo the
3115 NEXT_INSN at the end of the loop. */
3116 if (count_hi > hi_limit || count_si > si_limit)
3117 from = PREV_INSN (PREV_INSN (from));
3119 from = PREV_INSN (from);
3121 /* Walk back to be just before any jump or label.
3122 Putting it before a label reduces the number of times the branch
3123 around the constant pool table will be hit. Putting it before
3124 a jump makes it more likely that the bra delay slot will be
3126 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3127 || GET_CODE (from) == CODE_LABEL)
3128 from = PREV_INSN (from);
3130 from = emit_jump_insn_after (gen_jump (label), from);
3131 JUMP_LABEL (from) = label;
3132 LABEL_NUSES (label) = 1;
3133 found_barrier = emit_barrier_after (from);
3134 emit_label_after (label, found_barrier);
3137 return found_barrier;
3140 /* If the instruction INSN is implemented by a special function, and we can
3141 positively find the register that is used to call the sfunc, and this
3142 register is not used anywhere else in this instruction - except as the
3143 destination of a set, return this register; else, return 0. */
3145 sfunc_uses_reg (insn)
3149 rtx pattern, part, reg_part, reg;
3151 if (GET_CODE (insn) != INSN)
3153 pattern = PATTERN (insn);
3154 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3157 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3159 part = XVECEXP (pattern, 0, i);
3160 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3165 reg = XEXP (reg_part, 0);
3166 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3168 part = XVECEXP (pattern, 0, i);
3169 if (part == reg_part || GET_CODE (part) == CLOBBER)
3171 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3172 && GET_CODE (SET_DEST (part)) == REG)
3173 ? SET_SRC (part) : part)))
3179 /* See if the only way in which INSN uses REG is by calling it, or by
3180 setting it while calling it. Set *SET to a SET rtx if the register
3184 noncall_uses_reg (reg, insn, set)
3193 reg2 = sfunc_uses_reg (insn);
3194 if (reg2 && REGNO (reg2) == REGNO (reg))
3196 pattern = single_set (insn);
3198 && GET_CODE (SET_DEST (pattern)) == REG
3199 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3203 if (GET_CODE (insn) != CALL_INSN)
3205 /* We don't use rtx_equal_p because we don't care if the mode is
3207 pattern = single_set (insn);
3209 && GET_CODE (SET_DEST (pattern)) == REG
3210 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3216 par = PATTERN (insn);
3217 if (GET_CODE (par) == PARALLEL)
3218 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3220 part = XVECEXP (par, 0, i);
3221 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3224 return reg_mentioned_p (reg, SET_SRC (pattern));
3230 pattern = PATTERN (insn);
3232 if (GET_CODE (pattern) == PARALLEL)
3236 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3237 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3239 pattern = XVECEXP (pattern, 0, 0);
3242 if (GET_CODE (pattern) == SET)
3244 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3246 /* We don't use rtx_equal_p, because we don't care if the
3247 mode is different. */
3248 if (GET_CODE (SET_DEST (pattern)) != REG
3249 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3255 pattern = SET_SRC (pattern);
3258 if (GET_CODE (pattern) != CALL
3259 || GET_CODE (XEXP (pattern, 0)) != MEM
3260 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3266 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3267 general registers. Bits 0..15 mean that the respective registers
3268 are used as inputs in the instruction. Bits 16..31 mean that the
3269 registers 0..15, respectively, are used as outputs, or are clobbered.
3270 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3272 regs_used (x, is_dest)
3281 code = GET_CODE (x);
3286 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3287 << (REGNO (x) + is_dest));
3291 rtx y = SUBREG_REG (x);
3293 if (GET_CODE (y) != REG)
3296 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3298 subreg_regno_offset (REGNO (y),
3301 GET_MODE (x)) + is_dest));
3305 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3307 /* If there was a return value, it must have been indicated with USE. */
3322 fmt = GET_RTX_FORMAT (code);
3324 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3329 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3330 used |= regs_used (XVECEXP (x, i, j), is_dest);
3332 else if (fmt[i] == 'e')
3333 used |= regs_used (XEXP (x, i), is_dest);
3338 /* Create an instruction that prevents redirection of a conditional branch
3339 to the destination of the JUMP with address ADDR.
3340 If the branch needs to be implemented as an indirect jump, try to find
3341 a scratch register for it.
3342 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3343 If any preceding insn that doesn't fit into a delay slot is good enough,
3344 pass 1. Pass 2 if a definite blocking insn is needed.
3345 -1 is used internally to avoid deep recursion.
3346 If a blocking instruction is made or recognized, return it. */
3349 gen_block_redirect (jump, addr, need_block)
3351 int addr, need_block;
3354 rtx prev = prev_nonnote_insn (jump);
3357 /* First, check if we already have an instruction that satisfies our need. */
3358 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3360 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3362 if (GET_CODE (PATTERN (prev)) == USE
3363 || GET_CODE (PATTERN (prev)) == CLOBBER
3364 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3366 else if ((need_block &= ~1) < 0)
3368 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3371 /* We can't use JUMP_LABEL here because it might be undefined
3372 when not optimizing. */
3373 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3374 /* If the branch is out of range, try to find a scratch register for it. */
3376 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3380 /* Don't look for the stack pointer as a scratch register,
3381 it would cause trouble if an interrupt occurred. */
3382 unsigned try = 0x7fff, used;
3383 int jump_left = flag_expensive_optimizations + 1;
3385 /* It is likely that the most recent eligible instruction is wanted for
3386 the delay slot. Therefore, find out which registers it uses, and
3387 try to avoid using them. */
3389 for (scan = jump; (scan = PREV_INSN (scan)); )
3393 if (INSN_DELETED_P (scan))
3395 code = GET_CODE (scan);
3396 if (code == CODE_LABEL || code == JUMP_INSN)
3399 && GET_CODE (PATTERN (scan)) != USE
3400 && GET_CODE (PATTERN (scan)) != CLOBBER
3401 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3403 try &= ~regs_used (PATTERN (scan), 0);
3407 for (used = dead = 0, scan = JUMP_LABEL (jump);
3408 (scan = NEXT_INSN (scan)); )
3412 if (INSN_DELETED_P (scan))
3414 code = GET_CODE (scan);
3415 if (GET_RTX_CLASS (code) == 'i')
3417 used |= regs_used (PATTERN (scan), 0);
3418 if (code == CALL_INSN)
3419 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3420 dead |= (used >> 16) & ~used;
3426 if (code == JUMP_INSN)
3428 if (jump_left-- && simplejump_p (scan))
3429 scan = JUMP_LABEL (scan);
3435 /* Mask out the stack pointer again, in case it was
3436 the only 'free' register we have found. */
3439 /* If the immediate destination is still in range, check for possible
3440 threading with a jump beyond the delay slot insn.
3441 Don't check if we are called recursively; the jump has been or will be
3442 checked in a different invocation then. */
3444 else if (optimize && need_block >= 0)
3446 rtx next = next_active_insn (next_active_insn (dest));
3447 if (next && GET_CODE (next) == JUMP_INSN
3448 && GET_CODE (PATTERN (next)) == SET
3449 && recog_memoized (next) == CODE_FOR_jump_compact)
3451 dest = JUMP_LABEL (next);
3453 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3455 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3461 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3463 /* It would be nice if we could convert the jump into an indirect
3464 jump / far branch right now, and thus exposing all constituent
3465 instructions to further optimization. However, reorg uses
3466 simplejump_p to determine if there is an unconditional jump where
3467 it should try to schedule instructions from the target of the
3468 branch; simplejump_p fails for indirect jumps even if they have
3470 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3471 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3473 /* ??? We would like this to have the scope of the jump, but that
3474 scope will change when a delay slot insn of an inner scope is added.
3475 Hence, after delay slot scheduling, we'll have to expect
3476 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3479 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3480 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3483 else if (need_block)
3484 /* We can't use JUMP_LABEL here because it might be undefined
3485 when not optimizing. */
3486 return emit_insn_before (gen_block_branch_redirect
3487 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3492 #define CONDJUMP_MIN -252
3493 #define CONDJUMP_MAX 262
3496 /* A label (to be placed) in front of the jump
3497 that jumps to our ultimate destination. */
3499 /* Where we are going to insert it if we cannot move the jump any farther,
3500 or the jump itself if we have picked up an existing jump. */
3502 /* The ultimate destination. */
3504 struct far_branch *prev;
3505 /* If the branch has already been created, its address;
3506 else the address of its first prospective user. */
3510 static void gen_far_branch PARAMS ((struct far_branch *));
3511 enum mdep_reorg_phase_e mdep_reorg_phase;
3514 struct far_branch *bp;
3516 rtx insn = bp->insert_place;
3518 rtx label = gen_label_rtx ();
3520 emit_label_after (label, insn);
3523 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3524 LABEL_NUSES (bp->far_label)++;
3527 jump = emit_jump_insn_after (gen_return (), insn);
3528 /* Emit a barrier so that reorg knows that any following instructions
3529 are not reachable via a fall-through path.
3530 But don't do this when not optimizing, since we wouldn't supress the
3531 alignment for the barrier then, and could end up with out-of-range
3532 pc-relative loads. */
3534 emit_barrier_after (jump);
3535 emit_label_after (bp->near_label, insn);
3536 JUMP_LABEL (jump) = bp->far_label;
3537 if (! invert_jump (insn, label, 1))
3540 (gen_stuff_delay_slot
3541 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3542 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3544 /* Prevent reorg from undoing our splits. */
3545 gen_block_redirect (jump, bp->address += 2, 2);
3548 /* Fix up ADDR_DIFF_VECs. */
3550 fixup_addr_diff_vecs (first)
3555 for (insn = first; insn; insn = NEXT_INSN (insn))
3557 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3559 if (GET_CODE (insn) != JUMP_INSN
3560 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3562 pat = PATTERN (insn);
3563 vec_lab = XEXP (XEXP (pat, 0), 0);
3565 /* Search the matching casesi_jump_2. */
3566 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3568 if (GET_CODE (prev) != JUMP_INSN)
3570 prevpat = PATTERN (prev);
3571 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3573 x = XVECEXP (prevpat, 0, 1);
3574 if (GET_CODE (x) != USE)
3577 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3581 /* Emit the reference label of the braf where it belongs, right after
3582 the casesi_jump_2 (i.e. braf). */
3583 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3584 emit_label_after (braf_label, prev);
3586 /* Fix up the ADDR_DIF_VEC to be relative
3587 to the reference address of the braf. */
3588 XEXP (XEXP (pat, 0), 0) = braf_label;
3592 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3593 a barrier. Return the base 2 logarithm of the desired alignment. */
3595 barrier_align (barrier_or_label)
3596 rtx barrier_or_label;
3598 rtx next = next_real_insn (barrier_or_label), pat, prev;
3599 int slot, credit, jump_to_next = 0;
3604 pat = PATTERN (next);
3606 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3609 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3610 /* This is a barrier in front of a constant table. */
3613 prev = prev_real_insn (barrier_or_label);
3614 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3616 pat = PATTERN (prev);
3617 /* If this is a very small table, we want to keep the alignment after
3618 the table to the minimum for proper code alignment. */
3619 return ((TARGET_SMALLCODE
3620 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3621 <= (unsigned)1 << (CACHE_LOG - 2)))
3622 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3625 if (TARGET_SMALLCODE)
3628 if (! TARGET_SH2 || ! optimize)
3629 return align_jumps_log;
3631 /* When fixing up pcloads, a constant table might be inserted just before
3632 the basic block that ends with the barrier. Thus, we can't trust the
3633 instruction lengths before that. */
3634 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3636 /* Check if there is an immediately preceding branch to the insn beyond
3637 the barrier. We must weight the cost of discarding useful information
3638 from the current cache line when executing this branch and there is
3639 an alignment, against that of fetching unneeded insn in front of the
3640 branch target when there is no alignment. */
3642 /* There are two delay_slot cases to consider. One is the simple case
3643 where the preceding branch is to the insn beyond the barrier (simple
3644 delay slot filling), and the other is where the preceding branch has
3645 a delay slot that is a duplicate of the insn after the barrier
3646 (fill_eager_delay_slots) and the branch is to the insn after the insn
3647 after the barrier. */
3649 /* PREV is presumed to be the JUMP_INSN for the barrier under
3650 investigation. Skip to the insn before it. */
3651 prev = prev_real_insn (prev);
3653 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3654 credit >= 0 && prev && GET_CODE (prev) == INSN;
3655 prev = prev_real_insn (prev))
3658 if (GET_CODE (PATTERN (prev)) == USE
3659 || GET_CODE (PATTERN (prev)) == CLOBBER)
3661 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3663 prev = XVECEXP (PATTERN (prev), 0, 1);
3664 if (INSN_UID (prev) == INSN_UID (next))
3666 /* Delay slot was filled with insn at jump target. */
3673 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3675 credit -= get_attr_length (prev);
3678 && GET_CODE (prev) == JUMP_INSN
3679 && JUMP_LABEL (prev))
3683 || next_real_insn (JUMP_LABEL (prev)) == next
3684 /* If relax_delay_slots() decides NEXT was redundant
3685 with some previous instruction, it will have
3686 redirected PREV's jump to the following insn. */
3687 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3688 /* There is no upper bound on redundant instructions
3689 that might have been skipped, but we must not put an
3690 alignment where none had been before. */
3691 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3693 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3694 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3695 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3697 rtx pat = PATTERN (prev);
3698 if (GET_CODE (pat) == PARALLEL)
3699 pat = XVECEXP (pat, 0, 0);
3700 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3706 return align_jumps_log;
3709 /* If we are inside a phony loop, almost any kind of label can turn up as the
3710 first one in the loop. Aligning a braf label causes incorrect switch
3711 destination addresses; we can detect braf labels because they are
3712 followed by a BARRIER.
3713 Applying loop alignment to small constant or switch tables is a waste
3714 of space, so we suppress this too. */
3716 sh_loop_align (label)
3722 next = next_nonnote_insn (next);
3723 while (next && GET_CODE (next) == CODE_LABEL);
3727 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3728 || recog_memoized (next) == CODE_FOR_consttable_2)
3731 return align_loops_log;
3734 /* Do a final pass over the function, just before delayed branch
3740 rtx first, insn, mova = NULL_RTX;
3742 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3743 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3745 first = get_insns ();
3747 /* We must split call insns before introducing `mova's. If we're
3748 optimizing, they'll have already been split. Otherwise, make
3749 sure we don't split them too late. */
3751 split_all_insns_noflow ();
3756 /* If relaxing, generate pseudo-ops to associate function calls with
3757 the symbols they call. It does no harm to not generate these
3758 pseudo-ops. However, when we can generate them, it enables to
3759 linker to potentially relax the jsr to a bsr, and eliminate the
3760 register load and, possibly, the constant pool entry. */
3762 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3765 /* Remove all REG_LABEL notes. We want to use them for our own
3766 purposes. This works because none of the remaining passes
3767 need to look at them.
3769 ??? But it may break in the future. We should use a machine
3770 dependent REG_NOTE, or some other approach entirely. */
3771 for (insn = first; insn; insn = NEXT_INSN (insn))
3777 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3778 remove_note (insn, note);
3782 for (insn = first; insn; insn = NEXT_INSN (insn))
3784 rtx pattern, reg, link, set, scan, dies, label;
3785 int rescan = 0, foundinsn = 0;
3787 if (GET_CODE (insn) == CALL_INSN)
3789 pattern = PATTERN (insn);
3791 if (GET_CODE (pattern) == PARALLEL)
3792 pattern = XVECEXP (pattern, 0, 0);
3793 if (GET_CODE (pattern) == SET)
3794 pattern = SET_SRC (pattern);
3796 if (GET_CODE (pattern) != CALL
3797 || GET_CODE (XEXP (pattern, 0)) != MEM)
3800 reg = XEXP (XEXP (pattern, 0), 0);
3804 reg = sfunc_uses_reg (insn);
3809 if (GET_CODE (reg) != REG)
3812 /* This is a function call via REG. If the only uses of REG
3813 between the time that it is set and the time that it dies
3814 are in function calls, then we can associate all the
3815 function calls with the setting of REG. */
3817 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3819 if (REG_NOTE_KIND (link) != 0)
3821 set = single_set (XEXP (link, 0));
3822 if (set && rtx_equal_p (reg, SET_DEST (set)))
3824 link = XEXP (link, 0);
3831 /* ??? Sometimes global register allocation will have
3832 deleted the insn pointed to by LOG_LINKS. Try
3833 scanning backward to find where the register is set. */
3834 for (scan = PREV_INSN (insn);
3835 scan && GET_CODE (scan) != CODE_LABEL;
3836 scan = PREV_INSN (scan))
3838 if (! INSN_P (scan))
3841 if (! reg_mentioned_p (reg, scan))
3844 if (noncall_uses_reg (reg, scan, &set))
3858 /* The register is set at LINK. */
3860 /* We can only optimize the function call if the register is
3861 being set to a symbol. In theory, we could sometimes
3862 optimize calls to a constant location, but the assembler
3863 and linker do not support that at present. */
3864 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3865 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3868 /* Scan forward from LINK to the place where REG dies, and
3869 make sure that the only insns which use REG are
3870 themselves function calls. */
3872 /* ??? This doesn't work for call targets that were allocated
3873 by reload, since there may not be a REG_DEAD note for the
3877 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3881 /* Don't try to trace forward past a CODE_LABEL if we haven't
3882 seen INSN yet. Ordinarily, we will only find the setting insn
3883 in LOG_LINKS if it is in the same basic block. However,
3884 cross-jumping can insert code labels in between the load and
3885 the call, and can result in situations where a single call
3886 insn may have two targets depending on where we came from. */
3888 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3891 if (! INSN_P (scan))
3894 /* Don't try to trace forward past a JUMP. To optimize
3895 safely, we would have to check that all the
3896 instructions at the jump destination did not use REG. */
3898 if (GET_CODE (scan) == JUMP_INSN)
3901 if (! reg_mentioned_p (reg, scan))
3904 if (noncall_uses_reg (reg, scan, &scanset))
3911 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3913 /* There is a function call to this register other
3914 than the one we are checking. If we optimize
3915 this call, we need to rescan again below. */
3919 /* ??? We shouldn't have to worry about SCANSET here.
3920 We should just be able to check for a REG_DEAD note
3921 on a function call. However, the REG_DEAD notes are
3922 apparently not dependable around libcalls; c-torture
3923 execute/920501-2 is a test case. If SCANSET is set,
3924 then this insn sets the register, so it must have
3925 died earlier. Unfortunately, this will only handle
3926 the cases in which the register is, in fact, set in a
3929 /* ??? We shouldn't have to use FOUNDINSN here.
3930 However, the LOG_LINKS fields are apparently not
3931 entirely reliable around libcalls;
3932 newlib/libm/math/e_pow.c is a test case. Sometimes
3933 an insn will appear in LOG_LINKS even though it is
3934 not the most recent insn which sets the register. */
3938 || find_reg_note (scan, REG_DEAD, reg)))
3947 /* Either there was a branch, or some insn used REG
3948 other than as a function call address. */
3952 /* Create a code label, and put it in a REG_LABEL note on
3953 the insn which sets the register, and on each call insn
3954 which uses the register. In final_prescan_insn we look
3955 for the REG_LABEL notes, and output the appropriate label
3958 label = gen_label_rtx ();
3959 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3961 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3970 scan = NEXT_INSN (scan);
3972 && ((GET_CODE (scan) == CALL_INSN
3973 && reg_mentioned_p (reg, scan))
3974 || ((reg2 = sfunc_uses_reg (scan))
3975 && REGNO (reg2) == REGNO (reg))))
3977 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3979 while (scan != dies);
3985 fixup_addr_diff_vecs (first);
3989 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3990 shorten_branches (first);
3992 /* Scan the function looking for move instructions which have to be
3993 changed to pc-relative loads and insert the literal tables. */
3995 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3996 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4003 else if (GET_CODE (insn) == JUMP_INSN
4004 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4012 /* Some code might have been inserted between the mova and
4013 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4014 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4015 total += get_attr_length (scan);
4017 /* range of mova is 1020, add 4 because pc counts from address of
4018 second instruction after this one, subtract 2 in case pc is 2
4019 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4020 cancels out with alignment effects of the mova itself. */
4023 /* Change the mova into a load, and restart scanning
4024 there. broken_move will then return true for mova. */
4025 SET_SRC (PATTERN (mova))
4026 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4027 INSN_CODE (mova) = -1;
4031 if (broken_move (insn))
4034 /* Scan ahead looking for a barrier to stick the constant table
4036 rtx barrier = find_barrier (num_mova, mova, insn);
4037 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4039 if (num_mova && ! mova_p (mova))
4041 /* find_barrier had to change the first mova into a
4042 pcload; thus, we have to start with this new pcload. */
4046 /* Now find all the moves between the points and modify them. */
4047 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4049 if (GET_CODE (scan) == CODE_LABEL)
4051 if (broken_move (scan))
4053 rtx *patp = &PATTERN (scan), pat = *patp;
4057 enum machine_mode mode;
4059 if (GET_CODE (pat) == PARALLEL)
4060 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4061 src = SET_SRC (pat);
4062 dst = SET_DEST (pat);
4063 mode = GET_MODE (dst);
4065 if (mode == SImode && hi_const (src)
4066 && REGNO (dst) != FPUL_REG)
4071 while (GET_CODE (dst) == SUBREG)
4073 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4074 GET_MODE (SUBREG_REG (dst)),
4077 dst = SUBREG_REG (dst);
4079 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4082 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4084 /* This must be an insn that clobbers r0. */
4085 rtx clobber = XVECEXP (PATTERN (scan), 0,
4086 XVECLEN (PATTERN (scan), 0) - 1);
4088 if (GET_CODE (clobber) != CLOBBER
4089 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4093 && reg_set_between_p (r0_rtx, last_float_move, scan))
4097 && GET_MODE_SIZE (mode) != 4
4098 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4100 lab = add_constant (src, mode, last_float);
4102 emit_insn_before (gen_mova (lab), scan);
4105 /* There will be a REG_UNUSED note for r0 on
4106 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4107 lest reorg:mark_target_live_regs will not
4108 consider r0 to be used, and we end up with delay
4109 slot insn in front of SCAN that clobbers r0. */
4111 = find_regno_note (last_float_move, REG_UNUSED, 0);
4113 /* If we are not optimizing, then there may not be
4116 PUT_MODE (note, REG_INC);
4118 *last_float_addr = r0_inc_rtx;
4120 last_float_move = scan;
4122 newsrc = gen_rtx (MEM, mode,
4123 (((TARGET_SH4 && ! TARGET_FMOVD)
4124 || REGNO (dst) == FPUL_REG)
4127 last_float_addr = &XEXP (newsrc, 0);
4129 /* Remove the clobber of r0. */
4130 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4131 RTX_UNCHANGING_P (newsrc) = 1;
4133 /* This is a mova needing a label. Create it. */
4134 else if (GET_CODE (src) == UNSPEC
4135 && XINT (src, 1) == UNSPEC_MOVA
4136 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4138 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4139 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4140 newsrc = gen_rtx_UNSPEC (SImode,
4141 gen_rtvec (1, newsrc),
4146 lab = add_constant (src, mode, 0);
4147 newsrc = gen_rtx_MEM (mode,
4148 gen_rtx_LABEL_REF (VOIDmode, lab));
4149 RTX_UNCHANGING_P (newsrc) = 1;
4151 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4152 INSN_CODE (scan) = -1;
4155 dump_table (barrier);
4160 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4161 INSN_ADDRESSES_FREE ();
4162 split_branches (first);
4164 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4165 also has an effect on the register that holds the address of the sfunc.
4166 Insert an extra dummy insn in front of each sfunc that pretends to
4167 use this register. */
4168 if (flag_delayed_branch)
4170 for (insn = first; insn; insn = NEXT_INSN (insn))
4172 rtx reg = sfunc_uses_reg (insn);
4176 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4180 /* fpscr is not actually a user variable, but we pretend it is for the
4181 sake of the previous optimization passes, since we want it handled like
4182 one. However, we don't have any debugging information for it, so turn
4183 it into a non-user variable now. */
4185 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4187 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4191 get_dest_uid (label, max_uid)
4195 rtx dest = next_real_insn (label);
4198 /* This can happen for an undefined label. */
4200 dest_uid = INSN_UID (dest);
4201 /* If this is a newly created branch redirection blocking instruction,
4202 we cannot index the branch_uid or insn_addresses arrays with its
4203 uid. But then, we won't need to, because the actual destination is
4204 the following branch. */
4205 while (dest_uid >= max_uid)
4207 dest = NEXT_INSN (dest);
4208 dest_uid = INSN_UID (dest);
4210 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4215 /* Split condbranches that are out of range. Also add clobbers for
4216 scratch registers that are needed in far jumps.
4217 We do this before delay slot scheduling, so that it can take our
4218 newly created instructions into account. It also allows us to
4219 find branches with common targets more easily. */
4222 split_branches (first)
4226 struct far_branch **uid_branch, *far_branch_list = 0;
4227 int max_uid = get_max_uid ();
4229 /* Find out which branches are out of range. */
4230 shorten_branches (first);
4232 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4233 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4235 for (insn = first; insn; insn = NEXT_INSN (insn))
4236 if (! INSN_P (insn))
4238 else if (INSN_DELETED_P (insn))
4240 /* Shorten_branches would split this instruction again,
4241 so transform it into a note. */
4242 PUT_CODE (insn, NOTE);
4243 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4244 NOTE_SOURCE_FILE (insn) = 0;
4246 else if (GET_CODE (insn) == JUMP_INSN
4247 /* Don't mess with ADDR_DIFF_VEC */
4248 && (GET_CODE (PATTERN (insn)) == SET
4249 || GET_CODE (PATTERN (insn)) == RETURN))
4251 enum attr_type type = get_attr_type (insn);
4252 if (type == TYPE_CBRANCH)
4256 if (get_attr_length (insn) > 4)
4258 rtx src = SET_SRC (PATTERN (insn));
4259 rtx olabel = XEXP (XEXP (src, 1), 0);
4260 int addr = INSN_ADDRESSES (INSN_UID (insn));
4262 int dest_uid = get_dest_uid (olabel, max_uid);
4263 struct far_branch *bp = uid_branch[dest_uid];
4265 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4266 the label if the LABEL_NUSES count drops to zero. There is
4267 always a jump_optimize pass that sets these values, but it
4268 proceeds to delete unreferenced code, and then if not
4269 optimizing, to un-delete the deleted instructions, thus
4270 leaving labels with too low uses counts. */
4273 JUMP_LABEL (insn) = olabel;
4274 LABEL_NUSES (olabel)++;
4278 bp = (struct far_branch *) alloca (sizeof *bp);
4279 uid_branch[dest_uid] = bp;
4280 bp->prev = far_branch_list;
4281 far_branch_list = bp;
4283 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4284 LABEL_NUSES (bp->far_label)++;
4288 label = bp->near_label;
4289 if (! label && bp->address - addr >= CONDJUMP_MIN)
4291 rtx block = bp->insert_place;
4293 if (GET_CODE (PATTERN (block)) == RETURN)
4294 block = PREV_INSN (block);
4296 block = gen_block_redirect (block,
4298 label = emit_label_after (gen_label_rtx (),
4300 bp->near_label = label;
4302 else if (label && ! NEXT_INSN (label))
4304 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4305 bp->insert_place = insn;
4307 gen_far_branch (bp);
4311 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4313 bp->near_label = label = gen_label_rtx ();
4314 bp->insert_place = insn;
4317 if (! redirect_jump (insn, label, 1))
4322 /* get_attr_length (insn) == 2 */
4323 /* Check if we have a pattern where reorg wants to redirect
4324 the branch to a label from an unconditional branch that
4326 /* We can't use JUMP_LABEL here because it might be undefined
4327 when not optimizing. */
4328 /* A syntax error might cause beyond to be NULL_RTX. */
4330 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4334 && (GET_CODE (beyond) == JUMP_INSN
4335 || ((beyond = next_active_insn (beyond))
4336 && GET_CODE (beyond) == JUMP_INSN))
4337 && GET_CODE (PATTERN (beyond)) == SET
4338 && recog_memoized (beyond) == CODE_FOR_jump_compact
4340 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4341 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4343 gen_block_redirect (beyond,
4344 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4347 next = next_active_insn (insn);
4349 if ((GET_CODE (next) == JUMP_INSN
4350 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4351 && GET_CODE (PATTERN (next)) == SET
4352 && recog_memoized (next) == CODE_FOR_jump_compact
4354 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4355 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4357 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4359 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4361 int addr = INSN_ADDRESSES (INSN_UID (insn));
4364 struct far_branch *bp;
4366 if (type == TYPE_JUMP)
4368 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4369 dest_uid = get_dest_uid (far_label, max_uid);
4372 /* Parse errors can lead to labels outside
4374 if (! NEXT_INSN (far_label))
4379 JUMP_LABEL (insn) = far_label;
4380 LABEL_NUSES (far_label)++;
4382 redirect_jump (insn, NULL_RTX, 1);
4386 bp = uid_branch[dest_uid];
4389 bp = (struct far_branch *) alloca (sizeof *bp);
4390 uid_branch[dest_uid] = bp;
4391 bp->prev = far_branch_list;
4392 far_branch_list = bp;
4394 bp->far_label = far_label;
4396 LABEL_NUSES (far_label)++;
4398 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4399 if (addr - bp->address <= CONDJUMP_MAX)
4400 emit_label_after (bp->near_label, PREV_INSN (insn));
4403 gen_far_branch (bp);
4409 bp->insert_place = insn;
4411 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4413 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4416 /* Generate all pending far branches,
4417 and free our references to the far labels. */
4418 while (far_branch_list)
4420 if (far_branch_list->near_label
4421 && ! NEXT_INSN (far_branch_list->near_label))
4422 gen_far_branch (far_branch_list);
4424 && far_branch_list->far_label
4425 && ! --LABEL_NUSES (far_branch_list->far_label))
4426 delete_insn (far_branch_list->far_label);
4427 far_branch_list = far_branch_list->prev;
4430 /* Instruction length information is no longer valid due to the new
4431 instructions that have been generated. */
4432 init_insn_lengths ();
4435 /* Dump out instruction addresses, which is useful for debugging the
4436 constant pool table stuff.
4438 If relaxing, output the label and pseudo-ops used to link together
4439 calls and the instruction which set the registers. */
4441 /* ??? The addresses printed by this routine for insns are nonsense for
4442 insns which are inside of a sequence where none of the inner insns have
4443 variable length. This is because the second pass of shorten_branches
4444 does not bother to update them. */
4447 final_prescan_insn (insn, opvec, noperands)
4449 rtx *opvec ATTRIBUTE_UNUSED;
4450 int noperands ATTRIBUTE_UNUSED;
4452 if (TARGET_DUMPISIZE)
4453 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4459 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4464 pattern = PATTERN (insn);
4465 if (GET_CODE (pattern) == PARALLEL)
4466 pattern = XVECEXP (pattern, 0, 0);
4467 if (GET_CODE (pattern) == CALL
4468 || (GET_CODE (pattern) == SET
4469 && (GET_CODE (SET_SRC (pattern)) == CALL
4470 || get_attr_type (insn) == TYPE_SFUNC)))
4471 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4472 CODE_LABEL_NUMBER (XEXP (note, 0)));
4473 else if (GET_CODE (pattern) == SET)
4474 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4475 CODE_LABEL_NUMBER (XEXP (note, 0)));
4482 /* Dump out any constants accumulated in the final pass. These will
4486 output_jump_label_table ()
4492 fprintf (asm_out_file, "\t.align 2\n");
4493 for (i = 0; i < pool_size; i++)
4495 pool_node *p = &pool_vector[i];
4497 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4498 CODE_LABEL_NUMBER (p->label));
4499 output_asm_insn (".long %O0", &p->value);
4507 /* A full frame looks like:
4511 [ if current_function_anonymous_args
4524 local-0 <- fp points here. */
4526 /* Number of bytes pushed for anonymous args, used to pass information
4527 between expand_prologue and expand_epilogue. */
4529 static int extra_push;
4531 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4532 to be adjusted, and TEMP, if nonnegative, holds the register number
4533 of a general register that we may clobber. */
4536 output_stack_adjust (size, reg, temp, emit_fn)
4540 rtx (*emit_fn) PARAMS ((rtx));
4544 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4549 if (CONST_OK_FOR_ADD (size))
4550 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4551 /* Try to do it with two partial adjustments; however, we must make
4552 sure that the stack is properly aligned at all times, in case
4553 an interrupt occurs between the two partial adjustments. */
4554 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4555 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4557 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4558 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4565 /* If TEMP is invalid, we could temporarily save a general
4566 register to MACL. However, there is currently no need
4567 to handle this case, so just abort when we see it. */
4570 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4572 /* If SIZE is negative, subtract the positive value.
4573 This sometimes allows a constant pool entry to be shared
4574 between prologue and epilogue code. */
4577 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4578 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4582 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4583 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4585 if (emit_fn == frame_insn)
4587 = (gen_rtx_EXPR_LIST
4588 (REG_FRAME_RELATED_EXPR,
4589 gen_rtx_SET (VOIDmode, reg,
4590 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4601 RTX_FRAME_RELATED_P (x) = 1;
4605 /* Output RTL to push register RN onto the stack. */
4613 x = gen_push_fpul ();
4614 else if (rn == FPSCR_REG)
4615 x = gen_push_fpscr ();
4616 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4617 && FP_OR_XD_REGISTER_P (rn))
4619 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4621 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4623 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4624 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4626 x = gen_push (gen_rtx_REG (SImode, rn));
4630 = gen_rtx_EXPR_LIST (REG_INC,
4631 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4635 /* Output RTL to pop register RN from the stack. */
4643 x = gen_pop_fpul ();
4644 else if (rn == FPSCR_REG)
4645 x = gen_pop_fpscr ();
4646 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4647 && FP_OR_XD_REGISTER_P (rn))
4649 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4651 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4653 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4654 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4656 x = gen_pop (gen_rtx_REG (SImode, rn));
4660 = gen_rtx_EXPR_LIST (REG_INC,
4661 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4664 /* Generate code to push the regs specified in the mask. */
4667 push_regs (mask, interrupt_handler)
4669 int interrupt_handler;
4674 /* Push PR last; this gives better latencies after the prologue, and
4675 candidates for the return delay slot when there are no general
4676 registers pushed. */
4677 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4679 /* If this is an interrupt handler, and the SZ bit varies,
4680 and we have to push any floating point register, we need
4681 to switch to the correct precision first. */
4682 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4683 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4685 HARD_REG_SET unsaved;
4688 COMPL_HARD_REG_SET(unsaved, *mask);
4689 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4693 && (i != FPSCR_REG || ! skip_fpscr)
4694 && TEST_HARD_REG_BIT (*mask, i))
4697 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4701 /* Work out the registers which need to be saved, both as a mask and a
4702 count of saved words. Return the count.
4704 If doing a pragma interrupt function, then push all regs used by the
4705 function, and if we call another function (we can tell by looking at PR),
4706 make sure that all the regs it clobbers are safe too. */
4709 calc_live_regs (live_regs_mask)
4710 HARD_REG_SET *live_regs_mask;
4714 int interrupt_handler;
4717 interrupt_handler = sh_cfun_interrupt_handler_p ();
4719 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4720 CLEAR_HARD_REG_SET (*live_regs_mask);
4721 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4722 && regs_ever_live[FPSCR_REG])
4723 target_flags &= ~FPU_SINGLE_BIT;
4724 /* If we can save a lot of saves by switching to double mode, do that. */
4725 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4726 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4727 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4728 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4731 target_flags &= ~FPU_SINGLE_BIT;
4734 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4735 knows how to use it. That means the pseudo originally allocated for
4736 the initial value can become the PR_MEDIA_REG hard register, as seen for
4737 execute/20010122-1.c:test9. */
4739 pr_live = regs_ever_live[PR_MEDIA_REG];
4742 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4743 pr_live = (pr_initial
4744 ? (GET_CODE (pr_initial) != REG
4745 || REGNO (pr_initial) != (PR_REG))
4746 : regs_ever_live[PR_REG]);
4748 /* Force PR to be live if the prologue has to call the SHmedia
4749 argument decoder or register saver. */
4750 if (TARGET_SHCOMPACT
4751 && ((current_function_args_info.call_cookie
4752 & ~ CALL_COOKIE_RET_TRAMP (1))
4753 || current_function_has_nonlocal_label))
4755 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4757 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4759 : (interrupt_handler && ! pragma_trapa)
4760 ? (/* Need to save all the regs ever live. */
4761 (regs_ever_live[reg]
4762 || (call_used_regs[reg]
4763 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4765 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4766 && reg != RETURN_ADDRESS_POINTER_REGNUM
4767 && reg != T_REG && reg != GBR_REG
4768 /* Push fpscr only on targets which have FPU */
4769 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4770 : (/* Only push those regs which are used and need to be saved. */
4773 && current_function_args_info.call_cookie
4774 && reg == PIC_OFFSET_TABLE_REGNUM)
4775 || (regs_ever_live[reg] && ! call_used_regs[reg])
4776 || (current_function_calls_eh_return
4777 && (reg == EH_RETURN_DATA_REGNO (0)
4778 || reg == EH_RETURN_DATA_REGNO (1)
4779 || reg == EH_RETURN_DATA_REGNO (2)
4780 || reg == EH_RETURN_DATA_REGNO (3)))))
4782 SET_HARD_REG_BIT (*live_regs_mask, reg);
4783 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4785 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4786 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4788 if (FP_REGISTER_P (reg))
4790 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4792 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
4793 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4796 else if (XD_REGISTER_P (reg))
4798 /* Must switch to double mode to access these registers. */
4799 target_flags &= ~FPU_SINGLE_BIT;
4808 /* Code to generate prologue and epilogue sequences */
4810 /* PUSHED is the number of bytes that are being pushed on the
4811 stack for register saves. Return the frame size, padded
4812 appropriately so that the stack stays properly aligned. */
4813 static HOST_WIDE_INT
4814 rounded_frame_size (pushed)
4817 HOST_WIDE_INT size = get_frame_size ();
4818 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4820 return ((size + pushed + align - 1) & -align) - pushed;
4823 /* Choose a call-clobbered target-branch register that remains
4824 unchanged along the whole function. We set it up as the return
4825 value in the prologue. */
4827 sh_media_register_for_return ()
4832 if (! current_function_is_leaf)
4835 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4837 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4838 if (call_used_regs[regno] && ! regs_ever_live[regno])
4845 sh_expand_prologue ()
4847 HARD_REG_SET live_regs_mask;
4850 int save_flags = target_flags;
4852 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4854 /* We have pretend args if we had an object sent partially in registers
4855 and partially on the stack, e.g. a large structure. */
4856 output_stack_adjust (-current_function_pretend_args_size
4857 - current_function_args_info.stack_regs * 8,
4858 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4862 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4863 /* We're going to use the PIC register to load the address of the
4864 incoming-argument decoder and/or of the return trampoline from
4865 the GOT, so make sure the PIC register is preserved and
4867 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4869 if (TARGET_SHCOMPACT
4870 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4874 /* First, make all registers with incoming arguments that will
4875 be pushed onto the stack live, so that register renaming
4876 doesn't overwrite them. */
4877 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4878 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4879 >= NPARM_REGS (SImode) - reg)
4880 for (; reg < NPARM_REGS (SImode); reg++)
4881 emit_insn (gen_shcompact_preserve_incoming_args
4882 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4883 else if (CALL_COOKIE_INT_REG_GET
4884 (current_function_args_info.call_cookie, reg) == 1)
4885 emit_insn (gen_shcompact_preserve_incoming_args
4886 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4888 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4890 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4891 GEN_INT (current_function_args_info.call_cookie));
4892 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4893 gen_rtx_REG (SImode, R0_REG));
4895 else if (TARGET_SHMEDIA)
4897 int tr = sh_media_register_for_return ();
4901 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4902 gen_rtx_REG (DImode, PR_MEDIA_REG));
4904 /* If this function only exits with sibcalls, this copy
4905 will be flagged as dead. */
4906 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4912 /* Emit the code for SETUP_VARARGS. */
4913 if (current_function_stdarg)
4915 /* This is not used by the SH2E calling convention */
4916 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 && ! TARGET_HITACHI)
4918 /* Push arg regs as if they'd been provided by caller in stack. */
4919 for (i = 0; i < NPARM_REGS(SImode); i++)
4921 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4924 if (i >= (NPARM_REGS(SImode)
4925 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4929 RTX_FRAME_RELATED_P (insn) = 0;
4935 /* If we're supposed to switch stacks at function entry, do so now. */
4937 emit_insn (gen_sp_switch_1 ());
4939 d = calc_live_regs (&live_regs_mask);
4940 /* ??? Maybe we could save some switching if we can move a mode switch
4941 that already happens to be at the function start into the prologue. */
4942 if (target_flags != save_flags && ! current_function_interrupt)
4943 emit_insn (gen_toggle_sz ());
4950 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4951 int offset_in_r0 = -1;
4954 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4955 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4956 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4958 offset = d + d_rounding;
4959 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4961 /* We loop twice: first, we save 8-byte aligned registers in the
4962 higher addresses, that are known to be aligned. Then, we
4963 proceed to saving 32-bit registers that don't need 8-byte
4965 /* Note that if you change this code in a way that affects where
4966 the return register is saved, you have to update not only
4967 sh_expand_epilogue, but also sh_set_return_address. */
4968 for (align = 1; align >= 0; align--)
4969 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4970 if (TEST_HARD_REG_BIT (live_regs_mask, i))
4972 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4974 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4976 if (mode == SFmode && (i % 2) == 1
4977 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4978 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
4985 /* If we're doing the aligned pass and this is not aligned,
4986 or we're doing the unaligned pass and this is aligned,
4988 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4992 offset -= GET_MODE_SIZE (mode);
4994 reg_rtx = gen_rtx_REG (mode, reg);
4996 mem_rtx = gen_rtx_MEM (mode,
4997 gen_rtx_PLUS (Pmode,
5001 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5007 if (HAVE_PRE_DECREMENT
5008 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5009 || mem_rtx == NULL_RTX
5010 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5012 pre_dec = gen_rtx_MEM (mode,
5013 gen_rtx_PRE_DEC (Pmode, r0));
5015 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5024 offset += GET_MODE_SIZE (mode);
5028 if (mem_rtx != NULL_RTX)
5031 if (offset_in_r0 == -1)
5033 emit_move_insn (r0, GEN_INT (offset));
5034 offset_in_r0 = offset;
5036 else if (offset != offset_in_r0)
5041 GEN_INT (offset - offset_in_r0)));
5042 offset_in_r0 += offset - offset_in_r0;
5045 if (pre_dec != NULL_RTX)
5051 (Pmode, r0, stack_pointer_rtx));
5055 offset -= GET_MODE_SIZE (mode);
5056 offset_in_r0 -= GET_MODE_SIZE (mode);
5061 mem_rtx = gen_rtx_MEM (mode, r0);
5063 mem_rtx = gen_rtx_MEM (mode,
5064 gen_rtx_PLUS (Pmode,
5068 /* We must not use an r0-based address for target-branch
5069 registers or for special registers without pre-dec
5070 memory addresses, since we store their values in r0
5072 if (TARGET_REGISTER_P (i)
5073 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5074 && mem_rtx != pre_dec))
5078 if (TARGET_REGISTER_P (i)
5079 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5080 && mem_rtx != pre_dec))
5082 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
5084 emit_move_insn (r0mode, reg_rtx);
5092 emit_move_insn (mem_rtx, reg_rtx);
5095 if (offset != d_rounding)
5099 push_regs (&live_regs_mask, current_function_interrupt);
5101 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5103 rtx insn = get_last_insn ();
5104 rtx last = emit_insn (gen_GOTaddr2picreg ());
5106 /* Mark these insns as possibly dead. Sometimes, flow2 may
5107 delete all uses of the PIC register. In this case, let it
5108 delete the initialization too. */
5111 insn = NEXT_INSN (insn);
5113 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5117 while (insn != last);
5120 if (SHMEDIA_REGS_STACK_ADJUST ())
5122 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5123 function_symbol (TARGET_FPU_ANY
5124 ? "__GCC_push_shmedia_regs"
5125 : "__GCC_push_shmedia_regs_nofpu"));
5126 /* This must NOT go through the PLT, otherwise mach and macl
5127 may be clobbered. */
5128 emit_insn (gen_shmedia_save_restore_regs_compact
5129 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5132 if (target_flags != save_flags && ! current_function_interrupt)
5134 rtx insn = emit_insn (gen_toggle_sz ());
5136 /* If we're lucky, a mode switch in the function body will
5137 overwrite fpscr, turning this insn dead. Tell flow this
5138 insn is ok to delete. */
5139 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5144 target_flags = save_flags;
5146 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5147 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
5149 if (frame_pointer_needed)
5150 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5152 if (TARGET_SHCOMPACT
5153 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5155 /* This must NOT go through the PLT, otherwise mach and macl
5156 may be clobbered. */
5157 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5158 function_symbol ("__GCC_shcompact_incoming_args"));
5159 emit_insn (gen_shcompact_incoming_args ());
5164 sh_expand_epilogue ()
5166 HARD_REG_SET live_regs_mask;
5170 int save_flags = target_flags;
5172 int fpscr_deferred = 0;
5174 d = calc_live_regs (&live_regs_mask);
5176 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
5177 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5178 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5180 frame_size = rounded_frame_size (d) - d_rounding;
5182 if (frame_pointer_needed)
5184 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
5186 /* We must avoid moving the stack pointer adjustment past code
5187 which reads from the local frame, else an interrupt could
5188 occur after the SP adjustment and clobber data in the local
5190 emit_insn (gen_blockage ());
5191 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5193 else if (frame_size)
5195 /* We must avoid moving the stack pointer adjustment past code
5196 which reads from the local frame, else an interrupt could
5197 occur after the SP adjustment and clobber data in the local
5199 emit_insn (gen_blockage ());
5200 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
5203 if (SHMEDIA_REGS_STACK_ADJUST ())
5205 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5206 function_symbol (TARGET_FPU_ANY
5207 ? "__GCC_pop_shmedia_regs"
5208 : "__GCC_pop_shmedia_regs_nofpu"));
5209 /* This must NOT go through the PLT, otherwise mach and macl
5210 may be clobbered. */
5211 emit_insn (gen_shmedia_save_restore_regs_compact
5212 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5215 /* Pop all the registers. */
5217 if (target_flags != save_flags && ! current_function_interrupt)
5218 emit_insn (gen_toggle_sz ());
5221 int offset = d_rounding;
5222 int offset_in_r0 = -1;
5225 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5226 int tmp_regno = R20_REG;
5228 /* We loop twice: first, we save 8-byte aligned registers in the
5229 higher addresses, that are known to be aligned. Then, we
5230 proceed to saving 32-bit registers that don't need 8-byte
5232 for (align = 0; align <= 1; align++)
5233 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5234 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5236 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5238 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5240 if (mode == SFmode && (i % 2) == 0
5241 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5242 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5248 /* If we're doing the aligned pass and this is not aligned,
5249 or we're doing the unaligned pass and this is aligned,
5251 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5255 reg_rtx = gen_rtx_REG (mode, reg);
5257 mem_rtx = gen_rtx_MEM (mode,
5258 gen_rtx_PLUS (Pmode,
5262 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5268 if (HAVE_POST_INCREMENT
5269 && (offset == offset_in_r0
5270 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5271 && mem_rtx == NULL_RTX)
5272 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5274 post_inc = gen_rtx_MEM (mode,
5275 gen_rtx_POST_INC (Pmode, r0));
5277 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5280 post_inc = NULL_RTX;
5289 if (mem_rtx != NULL_RTX)
5292 if (offset_in_r0 == -1)
5294 emit_move_insn (r0, GEN_INT (offset));
5295 offset_in_r0 = offset;
5297 else if (offset != offset_in_r0)
5302 GEN_INT (offset - offset_in_r0)));
5303 offset_in_r0 += offset - offset_in_r0;
5306 if (post_inc != NULL_RTX)
5312 (Pmode, r0, stack_pointer_rtx));
5318 offset_in_r0 += GET_MODE_SIZE (mode);
5321 mem_rtx = gen_rtx_MEM (mode, r0);
5323 mem_rtx = gen_rtx_MEM (mode,
5324 gen_rtx_PLUS (Pmode,
5328 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5329 && mem_rtx != post_inc)
5333 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5334 && mem_rtx != post_inc)
5336 insn = emit_move_insn (r0, mem_rtx);
5339 else if (TARGET_REGISTER_P (i))
5341 rtx tmp_reg = gen_rtx_REG (mode, tmp_regno);
5343 /* Give the scheduler a bit of freedom by using R20..R23
5344 in a round-robin fashion. Don't use R1 here because
5345 we want to use it for EH_RETURN_STACKADJ_RTX. */
5346 insn = emit_move_insn (tmp_reg, mem_rtx);
5348 if (++tmp_regno > R23_REG)
5349 tmp_regno = R20_REG;
5352 insn = emit_move_insn (reg_rtx, mem_rtx);
5354 offset += GET_MODE_SIZE (mode);
5357 if (offset != d + d_rounding)
5364 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5366 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5368 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5370 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5371 && hard_regs_intersect_p (&live_regs_mask,
5372 ®_class_contents[DF_REGS]))
5374 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5376 if (j == FIRST_FP_REG && fpscr_deferred)
5380 if (target_flags != save_flags && ! current_function_interrupt)
5381 emit_insn (gen_toggle_sz ());
5382 target_flags = save_flags;
5384 output_stack_adjust (extra_push + current_function_pretend_args_size
5386 + current_function_args_info.stack_regs * 8,
5387 stack_pointer_rtx, 7, emit_insn);
5389 if (current_function_calls_eh_return)
5390 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5391 EH_RETURN_STACKADJ_RTX));
5393 /* Switch back to the normal stack if necessary. */
5395 emit_insn (gen_sp_switch_2 ());
5397 /* Tell flow the insn that pops PR isn't dead. */
5398 /* PR_REG will never be live in SHmedia mode, and we don't need to
5399 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5400 by the return pattern. */
5401 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5402 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5405 static int sh_need_epilogue_known = 0;
5410 if (! sh_need_epilogue_known)
5415 sh_expand_epilogue ();
5416 epilogue = get_insns ();
5418 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5420 return sh_need_epilogue_known > 0;
5423 /* Emit code to change the current function's return address to RA.
5424 TEMP is available as a scratch register, if needed. */
5427 sh_set_return_address (ra, tmp)
5430 HARD_REG_SET live_regs_mask;
5433 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5436 d = calc_live_regs (&live_regs_mask);
5438 /* If pr_reg isn't life, we can set it (or the register given in
5439 sh_media_register_for_return) directly. */
5440 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5446 int rr_regno = sh_media_register_for_return ();
5451 rr = gen_rtx_REG (DImode, rr_regno);
5454 rr = gen_rtx_REG (SImode, pr_reg);
5456 emit_insn (GEN_MOV (rr, ra));
5457 /* Tell flow the register for return isn't dead. */
5458 emit_insn (gen_rtx_USE (VOIDmode, rr));
5468 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5469 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5470 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5474 /* We loop twice: first, we save 8-byte aligned registers in the
5475 higher addresses, that are known to be aligned. Then, we
5476 proceed to saving 32-bit registers that don't need 8-byte
5478 for (align = 0; align <= 1; align++)
5479 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5480 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5482 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5484 if (mode == SFmode && (i % 2) == 0
5485 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5486 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5492 /* If we're doing the aligned pass and this is not aligned,
5493 or we're doing the unaligned pass and this is aligned,
5495 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5502 offset += GET_MODE_SIZE (mode);
5505 /* We can't find pr register. */
5509 pr_offset = (rounded_frame_size (d) - d_rounding + offset
5510 + SHMEDIA_REGS_STACK_ADJUST ());
5513 pr_offset = rounded_frame_size (d) - d_rounding;
5515 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5516 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5518 tmp = gen_rtx_MEM (Pmode, tmp);
5519 emit_insn (GEN_MOV (tmp, ra));
5522 /* Clear variables at function end. */
5525 sh_output_function_epilogue (file, size)
5526 FILE *file ATTRIBUTE_UNUSED;
5527 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5529 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5530 sh_need_epilogue_known = 0;
5531 sp_switch = NULL_RTX;
5535 sh_builtin_saveregs ()
5537 /* First unnamed integer register. */
5538 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5539 /* Number of integer registers we need to save. */
5540 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5541 /* First unnamed SFmode float reg */
5542 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5543 /* Number of SFmode float regs to save. */
5544 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5547 HOST_WIDE_INT alias_set;
5553 int pushregs = n_intregs;
5555 while (pushregs < NPARM_REGS (SImode) - 1
5556 && (CALL_COOKIE_INT_REG_GET
5557 (current_function_args_info.call_cookie,
5558 NPARM_REGS (SImode) - pushregs)
5561 current_function_args_info.call_cookie
5562 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5567 if (pushregs == NPARM_REGS (SImode))
5568 current_function_args_info.call_cookie
5569 |= (CALL_COOKIE_INT_REG (0, 1)
5570 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5572 current_function_args_info.call_cookie
5573 |= CALL_COOKIE_STACKSEQ (pushregs);
5575 current_function_pretend_args_size += 8 * n_intregs;
5577 if (TARGET_SHCOMPACT)
5581 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5583 error ("__builtin_saveregs not supported by this subtarget");
5590 /* Allocate block of memory for the regs. */
5591 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5592 Or can assign_stack_local accept a 0 SIZE argument? */
5593 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5596 regbuf = gen_rtx_MEM (BLKmode,
5597 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5598 else if (n_floatregs & 1)
5602 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5603 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5604 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5605 regbuf = change_address (regbuf, BLKmode, addr);
5608 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5609 alias_set = get_varargs_alias_set ();
5610 set_mem_alias_set (regbuf, alias_set);
5613 This is optimized to only save the regs that are necessary. Explicitly
5614 named args need not be saved. */
5616 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5617 adjust_address (regbuf, BLKmode,
5618 n_floatregs * UNITS_PER_WORD),
5622 /* Return the address of the regbuf. */
5623 return XEXP (regbuf, 0);
5626 This is optimized to only save the regs that are necessary. Explicitly
5627 named args need not be saved.
5628 We explicitly build a pointer to the buffer because it halves the insn
5629 count when not optimizing (otherwise the pointer is built for each reg
5631 We emit the moves in reverse order so that we can use predecrement. */
5633 fpregs = gen_reg_rtx (Pmode);
5634 emit_move_insn (fpregs, XEXP (regbuf, 0));
5635 emit_insn (gen_addsi3 (fpregs, fpregs,
5636 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5640 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5642 emit_insn (gen_addsi3 (fpregs, fpregs,
5643 GEN_INT (-2 * UNITS_PER_WORD)));
5644 mem = gen_rtx_MEM (DFmode, fpregs);
5645 set_mem_alias_set (mem, alias_set);
5646 emit_move_insn (mem,
5647 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5649 regno = first_floatreg;
5652 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5653 mem = gen_rtx_MEM (SFmode, fpregs);
5654 set_mem_alias_set (mem, alias_set);
5655 emit_move_insn (mem,
5656 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5657 - (TARGET_LITTLE_ENDIAN != 0)));
5661 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5665 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5666 mem = gen_rtx_MEM (SFmode, fpregs);
5667 set_mem_alias_set (mem, alias_set);
5668 emit_move_insn (mem,
5669 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5672 /* Return the address of the regbuf. */
5673 return XEXP (regbuf, 0);
5676 /* Define the `__builtin_va_list' type for the ABI. */
5681 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5684 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5685 return ptr_type_node;
5687 record = make_node (RECORD_TYPE);
5689 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5691 f_next_o_limit = build_decl (FIELD_DECL,
5692 get_identifier ("__va_next_o_limit"),
5694 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5696 f_next_fp_limit = build_decl (FIELD_DECL,
5697 get_identifier ("__va_next_fp_limit"),
5699 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5702 DECL_FIELD_CONTEXT (f_next_o) = record;
5703 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5704 DECL_FIELD_CONTEXT (f_next_fp) = record;
5705 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5706 DECL_FIELD_CONTEXT (f_next_stack) = record;
5708 TYPE_FIELDS (record) = f_next_o;
5709 TREE_CHAIN (f_next_o) = f_next_o_limit;
5710 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5711 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5712 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5714 layout_type (record);
5719 /* Implement `va_start' for varargs and stdarg. */
5722 sh_va_start (valist, nextarg)
5726 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5727 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5733 expand_builtin_saveregs ();
5734 std_expand_builtin_va_start (valist, nextarg);
5738 if ((! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5740 std_expand_builtin_va_start (valist, nextarg);
5744 f_next_o = TYPE_FIELDS (va_list_type_node);
5745 f_next_o_limit = TREE_CHAIN (f_next_o);
5746 f_next_fp = TREE_CHAIN (f_next_o_limit);
5747 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5748 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5750 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5751 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5752 valist, f_next_o_limit);
5753 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5754 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5755 valist, f_next_fp_limit);
5756 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5757 valist, f_next_stack);
5759 /* Call __builtin_saveregs. */
5760 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5761 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5762 TREE_SIDE_EFFECTS (t) = 1;
5763 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5765 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5770 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5771 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5772 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5773 TREE_SIDE_EFFECTS (t) = 1;
5774 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5776 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5777 TREE_SIDE_EFFECTS (t) = 1;
5778 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5780 nint = current_function_args_info.arg_count[SH_ARG_INT];
5785 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5786 build_int_2 (UNITS_PER_WORD * nint, 0)));
5787 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5788 TREE_SIDE_EFFECTS (t) = 1;
5789 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5791 u = make_tree (ptr_type_node, nextarg);
5792 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5793 TREE_SIDE_EFFECTS (t) = 1;
5794 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5797 /* Implement `va_arg'. */
5800 sh_va_arg (valist, type)
5803 HOST_WIDE_INT size, rsize;
5804 tree tmp, pptr_type_node;
5807 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5809 size = int_size_in_bytes (type);
5810 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5811 pptr_type_node = build_pointer_type (ptr_type_node);
5814 type = build_pointer_type (type);
5816 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) && ! TARGET_HITACHI)
5818 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5819 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5821 rtx lab_false, lab_over;
5823 f_next_o = TYPE_FIELDS (va_list_type_node);
5824 f_next_o_limit = TREE_CHAIN (f_next_o);
5825 f_next_fp = TREE_CHAIN (f_next_o_limit);
5826 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5827 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5829 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5830 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5831 valist, f_next_o_limit);
5832 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5834 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5835 valist, f_next_fp_limit);
5836 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5837 valist, f_next_stack);
5841 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5842 || (TREE_CODE (type) == COMPLEX_TYPE
5843 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5848 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5851 addr_rtx = gen_reg_rtx (Pmode);
5852 lab_false = gen_label_rtx ();
5853 lab_over = gen_label_rtx ();
5858 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5859 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5861 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5863 expand_expr (next_fp_limit, NULL_RTX,
5864 Pmode, EXPAND_NORMAL),
5865 GE, const1_rtx, Pmode, 1, lab_false);
5867 if (TYPE_ALIGN (type) > BITS_PER_WORD
5868 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5869 && (n_floatregs & 1)))
5871 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5872 build_int_2 (UNITS_PER_WORD, 0));
5873 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5874 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5875 TREE_SIDE_EFFECTS (tmp) = 1;
5876 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5879 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5880 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5882 emit_move_insn (addr_rtx, r);
5884 emit_jump_insn (gen_jump (lab_over));
5886 emit_label (lab_false);
5888 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5889 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5891 emit_move_insn (addr_rtx, r);
5895 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5896 build_int_2 (rsize, 0));
5898 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5900 expand_expr (next_o_limit, NULL_RTX,
5901 Pmode, EXPAND_NORMAL),
5902 GT, const1_rtx, Pmode, 1, lab_false);
5904 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5905 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5907 emit_move_insn (addr_rtx, r);
5909 emit_jump_insn (gen_jump (lab_over));
5911 emit_label (lab_false);
5913 if (size > 4 && ! TARGET_SH4)
5915 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5916 TREE_SIDE_EFFECTS (tmp) = 1;
5917 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5920 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5921 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5923 emit_move_insn (addr_rtx, r);
5926 emit_label (lab_over);
5928 tmp = make_tree (pptr_type_node, addr_rtx);
5929 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5932 /* ??? In va-sh.h, there had been code to make values larger than
5933 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5935 result = std_expand_builtin_va_arg (valist, type);
5938 #ifdef POINTERS_EXTEND_UNSIGNED
5939 if (GET_MODE (addr) != Pmode)
5940 addr = convert_memory_address (Pmode, result);
5942 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5943 set_mem_alias_set (result, get_varargs_alias_set ());
5945 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5946 argument to the varargs alias set. */
5950 /* Define the offset between two registers, one to be eliminated, and
5951 the other its replacement, at the start of a routine. */
5954 initial_elimination_offset (from, to)
5959 int regs_saved_rounding = 0;
5960 int total_saved_regs_space;
5961 int total_auto_space;
5962 int save_flags = target_flags;
5965 HARD_REG_SET live_regs_mask;
5966 regs_saved = calc_live_regs (&live_regs_mask);
5967 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5968 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5969 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5970 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5972 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5973 copy_flags = target_flags;
5974 target_flags = save_flags;
5976 total_saved_regs_space = regs_saved + regs_saved_rounding;
5978 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5979 return total_saved_regs_space + total_auto_space
5980 + current_function_args_info.byref_regs * 8;
5982 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5983 return total_saved_regs_space + total_auto_space
5984 + current_function_args_info.byref_regs * 8;
5986 /* Initial gap between fp and sp is 0. */
5987 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5990 if (from == RETURN_ADDRESS_POINTER_REGNUM
5991 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5995 int i, n = total_saved_regs_space;
5997 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5999 n += total_auto_space;
6001 /* If it wasn't saved, there's not much we can do. */
6002 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6005 target_flags = copy_flags;
6007 /* We loop twice: first, check 8-byte aligned registers,
6008 that are stored in the higher addresses, that are known
6009 to be aligned. Then, check 32-bit registers that don't
6010 need 8-byte alignment. */
6011 for (align = 1; align >= 0; align--)
6012 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6013 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6015 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6017 if (mode == SFmode && (i % 2) == 1
6018 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6019 && TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1)))
6025 /* If we're doing the aligned pass and this is not aligned,
6026 or we're doing the unaligned pass and this is aligned,
6028 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
6032 n -= GET_MODE_SIZE (mode);
6036 target_flags = save_flags;
6044 return total_auto_space;
6050 /* Handle machine specific pragmas to be semi-compatible with Hitachi
6054 sh_pr_interrupt (pfile)
6055 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6057 pragma_interrupt = 1;
6062 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6064 pragma_interrupt = pragma_trapa = 1;
6068 sh_pr_nosave_low_regs (pfile)
6069 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6071 pragma_nosave_low_regs = 1;
6074 /* Generate 'handle_interrupt' attribute for decls */
6077 sh_insert_attributes (node, attributes)
6081 if (! pragma_interrupt
6082 || TREE_CODE (node) != FUNCTION_DECL)
6085 /* We are only interested in fields. */
6086 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6089 /* Add a 'handle_interrupt' attribute. */
6090 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6095 /* Supported attributes:
6097 interrupt_handler -- specifies this function is an interrupt handler.
6099 sp_switch -- specifies an alternate stack for an interrupt handler
6102 trap_exit -- use a trapa to exit an interrupt function instead of
6103 an rte instruction. */
6105 const struct attribute_spec sh_attribute_table[] =
6107 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6108 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6109 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6110 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6111 { NULL, 0, 0, false, false, false, NULL }
6114 /* Handle an "interrupt_handler" attribute; arguments as in
6115 struct attribute_spec.handler. */
6117 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
6120 tree args ATTRIBUTE_UNUSED;
6121 int flags ATTRIBUTE_UNUSED;
6124 if (TREE_CODE (*node) != FUNCTION_DECL)
6126 warning ("`%s' attribute only applies to functions",
6127 IDENTIFIER_POINTER (name));
6128 *no_add_attrs = true;
6130 else if (TARGET_SHCOMPACT)
6132 error ("attribute interrupt_handler is not compatible with -m5-compact");
6133 *no_add_attrs = true;
6139 /* Handle an "sp_switch" attribute; arguments as in
6140 struct attribute_spec.handler. */
6142 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
6146 int flags ATTRIBUTE_UNUSED;
6149 if (TREE_CODE (*node) != FUNCTION_DECL)
6151 warning ("`%s' attribute only applies to functions",
6152 IDENTIFIER_POINTER (name));
6153 *no_add_attrs = true;
6155 else if (!pragma_interrupt)
6157 /* The sp_switch attribute only has meaning for interrupt functions. */
6158 warning ("`%s' attribute only applies to interrupt functions",
6159 IDENTIFIER_POINTER (name));
6160 *no_add_attrs = true;
6162 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6164 /* The argument must be a constant string. */
6165 warning ("`%s' attribute argument not a string constant",
6166 IDENTIFIER_POINTER (name));
6167 *no_add_attrs = true;
6171 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6172 TREE_STRING_POINTER (TREE_VALUE (args)));
6178 /* Handle an "trap_exit" attribute; arguments as in
6179 struct attribute_spec.handler. */
6181 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
6185 int flags ATTRIBUTE_UNUSED;
6188 if (TREE_CODE (*node) != FUNCTION_DECL)
6190 warning ("`%s' attribute only applies to functions",
6191 IDENTIFIER_POINTER (name));
6192 *no_add_attrs = true;
6194 else if (!pragma_interrupt)
6196 /* The trap_exit attribute only has meaning for interrupt functions. */
6197 warning ("`%s' attribute only applies to interrupt functions",
6198 IDENTIFIER_POINTER (name));
6199 *no_add_attrs = true;
6201 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6203 /* The argument must be a constant integer. */
6204 warning ("`%s' attribute argument not an integer constant",
6205 IDENTIFIER_POINTER (name));
6206 *no_add_attrs = true;
6210 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6217 sh_cfun_interrupt_handler_p ()
6219 return (lookup_attribute ("interrupt_handler",
6220 DECL_ATTRIBUTES (current_function_decl))
6224 /* Predicates used by the templates. */
6226 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
6227 Used only in general_movsrc_operand. */
6230 system_reg_operand (op, mode)
6232 enum machine_mode mode ATTRIBUTE_UNUSED;
6244 /* Returns 1 if OP can be source of a simple move operation.
6245 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
6246 invalid as are subregs of system registers. */
6249 general_movsrc_operand (op, mode)
6251 enum machine_mode mode;
6253 if (GET_CODE (op) == MEM)
6255 rtx inside = XEXP (op, 0);
6256 if (GET_CODE (inside) == CONST)
6257 inside = XEXP (inside, 0);
6259 if (GET_CODE (inside) == LABEL_REF)
6262 if (GET_CODE (inside) == PLUS
6263 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
6264 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
6267 /* Only post inc allowed. */
6268 if (GET_CODE (inside) == PRE_DEC)
6272 if ((mode == QImode || mode == HImode)
6273 && (GET_CODE (op) == SUBREG
6274 && GET_CODE (XEXP (op, 0)) == REG
6275 && system_reg_operand (XEXP (op, 0), mode)))
6278 return general_operand (op, mode);
6281 /* Returns 1 if OP can be a destination of a move.
6282 Same as general_operand, but no preinc allowed. */
6285 general_movdst_operand (op, mode)
6287 enum machine_mode mode;
6289 /* Only pre dec allowed. */
6290 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
6293 return general_operand (op, mode);
6296 /* Returns 1 if OP is a normal arithmetic register. */
6299 arith_reg_operand (op, mode)
6301 enum machine_mode mode;
6303 if (register_operand (op, mode))
6307 if (GET_CODE (op) == REG)
6309 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6310 regno = REGNO (SUBREG_REG (op));
6314 return (regno != T_REG && regno != PR_REG
6315 && ! TARGET_REGISTER_P (regno)
6316 && (regno != FPUL_REG || TARGET_SH4)
6317 && regno != MACH_REG && regno != MACL_REG);
6322 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
6323 because this would lead to missing sign extensions when truncating from
6324 DImode to SImode. */
6326 arith_reg_dest (op, mode)
6328 enum machine_mode mode;
6330 if (mode == DImode && GET_CODE (op) == SUBREG
6331 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
6333 return arith_reg_operand (op, mode);
6337 int_gpr_dest (op, mode)
6339 enum machine_mode mode ATTRIBUTE_UNUSED;
6341 enum machine_mode op_mode = GET_MODE (op);
6343 if (GET_MODE_CLASS (op_mode) != MODE_INT
6344 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
6346 if (! reload_completed)
6348 return true_regnum (op) <= LAST_GENERAL_REG;
6352 fp_arith_reg_operand (op, mode)
6354 enum machine_mode mode;
6356 if (register_operand (op, mode))
6360 if (GET_CODE (op) == REG)
6362 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6363 regno = REGNO (SUBREG_REG (op));
6367 return (regno >= FIRST_PSEUDO_REGISTER
6368 || FP_REGISTER_P (regno));
6373 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
6376 arith_operand (op, mode)
6378 enum machine_mode mode;
6380 if (arith_reg_operand (op, mode))
6385 /* FIXME: We should be checking whether the CONST_INT fits in a
6386 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
6387 attempting to transform a sequence of two 64-bit sets of the
6388 same register from literal constants into a set and an add,
6389 when the difference is too wide for an add. */
6390 if (GET_CODE (op) == CONST_INT
6391 || EXTRA_CONSTRAINT_C16 (op))
6396 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
6402 /* Returns 1 if OP is a valid source operand for a compare insn. */
6405 arith_reg_or_0_operand (op, mode)
6407 enum machine_mode mode;
6409 if (arith_reg_operand (op, mode))
6412 if (EXTRA_CONSTRAINT_Z (op))
6418 /* Return 1 if OP is a valid source operand for an SHmedia operation
6419 that takes either a register or a 6-bit immediate. */
6422 shmedia_6bit_operand (op, mode)
6424 enum machine_mode mode;
6426 return (arith_reg_operand (op, mode)
6427 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
6430 /* Returns 1 if OP is a valid source operand for a logical operation. */
6433 logical_operand (op, mode)
6435 enum machine_mode mode;
6437 if (arith_reg_operand (op, mode))
6442 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
6447 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
6454 and_operand (op, mode)
6456 enum machine_mode mode;
6458 if (logical_operand (op, mode))
6461 /* Check mshflo.l / mshflhi.l opportunities. */
6464 && GET_CODE (op) == CONST_INT
6465 && CONST_OK_FOR_J16 (INTVAL (op)))
6471 /* Nonzero if OP is a floating point value with value 0.0. */
6474 fp_zero_operand (op)
6479 if (GET_MODE (op) != SFmode)
6482 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6483 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6486 /* Nonzero if OP is a floating point value with value 1.0. */
6494 if (GET_MODE (op) != SFmode)
6497 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6498 return REAL_VALUES_EQUAL (r, dconst1);
6501 /* For -m4 and -m4-single-only, mode switching is used. If we are
6502 compiling without -mfmovd, movsf_ie isn't taken into account for
6503 mode switching. We could check in machine_dependent_reorg for
6504 cases where we know we are in single precision mode, but there is
6505 interface to find that out during reload, so we must avoid
6506 choosing an fldi alternative during reload and thus failing to
6507 allocate a scratch register for the constant loading. */
6511 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6515 tertiary_reload_operand (op, mode)
6517 enum machine_mode mode ATTRIBUTE_UNUSED;
6519 enum rtx_code code = GET_CODE (op);
6520 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6524 fpscr_operand (op, mode)
6526 enum machine_mode mode ATTRIBUTE_UNUSED;
6528 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6529 && GET_MODE (op) == PSImode);
6533 fpul_operand (op, mode)
6535 enum machine_mode mode;
6538 return fp_arith_reg_operand (op, mode);
6540 return (GET_CODE (op) == REG
6541 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6542 && GET_MODE (op) == mode);
6546 symbol_ref_operand (op, mode)
6548 enum machine_mode mode ATTRIBUTE_UNUSED;
6550 return (GET_CODE (op) == SYMBOL_REF);
6553 /* Return the TLS type for TLS symbols, 0 for otherwise. */
6555 tls_symbolic_operand (op, mode)
6557 enum machine_mode mode ATTRIBUTE_UNUSED;
6559 if (GET_CODE (op) != SYMBOL_REF)
6561 return SYMBOL_REF_TLS_MODEL (op);
6565 commutative_float_operator (op, mode)
6567 enum machine_mode mode;
6569 if (GET_MODE (op) != mode)
6571 switch (GET_CODE (op))
6583 noncommutative_float_operator (op, mode)
6585 enum machine_mode mode;
6587 if (GET_MODE (op) != mode)
6589 switch (GET_CODE (op))
6601 unary_float_operator (op, mode)
6603 enum machine_mode mode;
6605 if (GET_MODE (op) != mode)
6607 switch (GET_CODE (op))
6620 binary_float_operator (op, mode)
6622 enum machine_mode mode;
6624 if (GET_MODE (op) != mode)
6626 switch (GET_CODE (op))
6640 binary_logical_operator (op, mode)
6642 enum machine_mode mode;
6644 if (GET_MODE (op) != mode)
6646 switch (GET_CODE (op))
6659 equality_comparison_operator (op, mode)
6661 enum machine_mode mode;
6663 return ((mode == VOIDmode || GET_MODE (op) == mode)
6664 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6667 int greater_comparison_operator (op, mode)
6669 enum machine_mode mode;
6671 if (mode != VOIDmode && GET_MODE (op) == mode)
6673 switch (GET_CODE (op))
6685 int less_comparison_operator (op, mode)
6687 enum machine_mode mode;
6689 if (mode != VOIDmode && GET_MODE (op) == mode)
6691 switch (GET_CODE (op))
6703 /* Accept pseudos and branch target registers. */
6705 target_reg_operand (op, mode)
6707 enum machine_mode mode;
6710 || GET_MODE (op) != DImode)
6713 if (GET_CODE (op) == SUBREG)
6716 if (GET_CODE (op) != REG)
6719 /* We must protect ourselves from matching pseudos that are virtual
6720 register, because they will eventually be replaced with hardware
6721 registers that aren't branch-target registers. */
6722 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6723 || TARGET_REGISTER_P (REGNO (op)))
6729 /* Same as target_reg_operand, except that label_refs and symbol_refs
6730 are accepted before reload. */
6732 target_operand (op, mode)
6734 enum machine_mode mode;
6739 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6740 && EXTRA_CONSTRAINT_Csy (op))
6741 return ! reload_completed;
6743 return target_reg_operand (op, mode);
6747 mextr_bit_offset (op, mode)
6749 enum machine_mode mode ATTRIBUTE_UNUSED;
6753 if (GET_CODE (op) != CONST_INT)
6756 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6760 extend_reg_operand (op, mode)
6762 enum machine_mode mode;
6764 return (GET_CODE (op) == TRUNCATE
6766 : arith_reg_operand) (op, mode);
6770 trunc_hi_operand (op, mode)
6772 enum machine_mode mode;
6774 enum machine_mode op_mode = GET_MODE (op);
6776 if (op_mode != SImode && op_mode != DImode
6777 && op_mode != V4HImode && op_mode != V2SImode)
6779 return extend_reg_operand (op, mode);
6783 extend_reg_or_0_operand (op, mode)
6785 enum machine_mode mode;
6787 return (GET_CODE (op) == TRUNCATE
6789 : arith_reg_or_0_operand) (op, mode);
6793 general_extend_operand (op, mode)
6795 enum machine_mode mode;
6797 return (GET_CODE (op) == TRUNCATE
6799 : nonimmediate_operand) (op, mode);
6803 inqhi_operand (op, mode)
6805 enum machine_mode mode;
6807 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6810 /* Can't use true_regnum here because copy_cost wants to know about
6811 SECONDARY_INPUT_RELOAD_CLASS. */
6812 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6816 sh_rep_vec (v, mode)
6818 enum machine_mode mode;
6823 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6824 || (GET_MODE (v) != mode && mode != VOIDmode))
6826 i = XVECLEN (v, 0) - 2;
6827 x = XVECEXP (v, 0, i + 1);
6828 if (GET_MODE_UNIT_SIZE (mode) == 1)
6830 y = XVECEXP (v, 0, i);
6831 for (i -= 2 ; i >= 0; i -= 2)
6832 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6833 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6838 if (XVECEXP (v, 0, i) != x)
6843 /* Determine if V is a constant vector matching MODE with only one element
6844 that is not a sign extension. Two byte-sized elements count as one. */
6846 sh_1el_vec (v, mode)
6848 enum machine_mode mode;
6851 int i, last, least, sign_ix;
6854 if (GET_CODE (v) != CONST_VECTOR
6855 || (GET_MODE (v) != mode && mode != VOIDmode))
6857 /* Determine numbers of last and of least significant elements. */
6858 last = XVECLEN (v, 0) - 1;
6859 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6860 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6863 if (GET_MODE_UNIT_SIZE (mode) == 1)
6864 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6865 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6867 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6868 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6869 ? constm1_rtx : const0_rtx);
6870 i = XVECLEN (v, 0) - 1;
6872 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6879 sh_const_vec (v, mode)
6881 enum machine_mode mode;
6885 if (GET_CODE (v) != CONST_VECTOR
6886 || (GET_MODE (v) != mode && mode != VOIDmode))
6888 i = XVECLEN (v, 0) - 1;
6890 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6895 /* Return the destination address of a branch. */
6898 branch_dest (branch)
6901 rtx dest = SET_SRC (PATTERN (branch));
6904 if (GET_CODE (dest) == IF_THEN_ELSE)
6905 dest = XEXP (dest, 1);
6906 dest = XEXP (dest, 0);
6907 dest_uid = INSN_UID (dest);
6908 return INSN_ADDRESSES (dest_uid);
6911 /* Return nonzero if REG is not used after INSN.
6912 We assume REG is a reload reg, and therefore does
6913 not live past labels. It may live past calls or jumps though. */
6915 reg_unused_after (reg, insn)
6922 /* If the reg is set by this instruction, then it is safe for our
6923 case. Disregard the case where this is a store to memory, since
6924 we are checking a register used in the store address. */
6925 set = single_set (insn);
6926 if (set && GET_CODE (SET_DEST (set)) != MEM
6927 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6930 while ((insn = NEXT_INSN (insn)))
6932 code = GET_CODE (insn);
6935 /* If this is a label that existed before reload, then the register
6936 if dead here. However, if this is a label added by reorg, then
6937 the register may still be live here. We can't tell the difference,
6938 so we just ignore labels completely. */
6939 if (code == CODE_LABEL)
6944 if (code == JUMP_INSN)
6947 /* If this is a sequence, we must handle them all at once.
6948 We could have for instance a call that sets the target register,
6949 and an insn in a delay slot that uses the register. In this case,
6950 we must return 0. */
6951 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6956 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6958 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6959 rtx set = single_set (this_insn);
6961 if (GET_CODE (this_insn) == CALL_INSN)
6963 else if (GET_CODE (this_insn) == JUMP_INSN)
6965 if (INSN_ANNULLED_BRANCH_P (this_insn))
6970 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6972 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6974 if (GET_CODE (SET_DEST (set)) != MEM)
6980 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6985 else if (code == JUMP_INSN)
6988 else if (GET_RTX_CLASS (code) == 'i')
6990 rtx set = single_set (insn);
6992 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6994 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6995 return GET_CODE (SET_DEST (set)) != MEM;
6996 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7000 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7008 static GTY(()) rtx fpscr_rtx;
7014 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
7015 REG_USERVAR_P (fpscr_rtx) = 1;
7016 mark_user_reg (fpscr_rtx);
7018 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7019 mark_user_reg (fpscr_rtx);
7038 expand_sf_unop (fun, operands)
7039 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7042 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7046 expand_sf_binop (fun, operands)
7047 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7050 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7055 expand_df_unop (fun, operands)
7056 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7059 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7063 expand_df_binop (fun, operands)
7064 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7067 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7071 /* ??? gcc does flow analysis strictly after common subexpression
7072 elimination. As a result, common subexpression elimination fails
7073 when there are some intervening statements setting the same register.
7074 If we did nothing about this, this would hurt the precision switching
7075 for SH4 badly. There is some cse after reload, but it is unable to
7076 undo the extra register pressure from the unused instructions, and
7077 it cannot remove auto-increment loads.
7079 A C code example that shows this flow/cse weakness for (at least) SH
7080 and sparc (as of gcc ss-970706) is this:
7094 So we add another pass before common subexpression elimination, to
7095 remove assignments that are dead due to a following assignment in the
7096 same basic block. */
7099 mark_use (x, reg_set_block)
7100 rtx x, *reg_set_block;
7106 code = GET_CODE (x);
7111 int regno = REGNO (x);
7112 int nregs = (regno < FIRST_PSEUDO_REGISTER
7113 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7117 reg_set_block[regno + nregs - 1] = 0;
7124 rtx dest = SET_DEST (x);
7126 if (GET_CODE (dest) == SUBREG)
7127 dest = SUBREG_REG (dest);
7128 if (GET_CODE (dest) != REG)
7129 mark_use (dest, reg_set_block);
7130 mark_use (SET_SRC (x), reg_set_block);
7137 const char *fmt = GET_RTX_FORMAT (code);
7139 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7142 mark_use (XEXP (x, i), reg_set_block);
7143 else if (fmt[i] == 'E')
7144 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7145 mark_use (XVECEXP (x, i, j), reg_set_block);
7152 static rtx get_free_reg PARAMS ((HARD_REG_SET));
7154 /* This function returns a register to use to load the address to load
7155 the fpscr from. Currently it always returns r1 or r7, but when we are
7156 able to use pseudo registers after combine, or have a better mechanism
7157 for choosing a register, it should be done here. */
7158 /* REGS_LIVE is the liveness information for the point for which we
7159 need this allocation. In some bare-bones exit blocks, r1 is live at the
7160 start. We can even have all of r0..r3 being live:
7161 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7162 INSN before which new insns are placed with will clobber the register
7163 we return. If a basic block consists only of setting the return value
7164 register to a pseudo and using that register, the return value is not
7165 live before or after this block, yet we we'll insert our insns right in
7169 get_free_reg (regs_live)
7170 HARD_REG_SET regs_live;
7172 if (! TEST_HARD_REG_BIT (regs_live, 1))
7173 return gen_rtx_REG (Pmode, 1);
7175 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7176 there shouldn't be anything but a jump before the function end. */
7177 if (! TEST_HARD_REG_BIT (regs_live, 7))
7178 return gen_rtx_REG (Pmode, 7);
7183 /* This function will set the fpscr from memory.
7184 MODE is the mode we are setting it to. */
7186 fpscr_set_from_mem (mode, regs_live)
7188 HARD_REG_SET regs_live;
7190 enum attr_fp_mode fp_mode = mode;
7191 rtx addr_reg = get_free_reg (regs_live);
7193 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7194 emit_insn (gen_fpu_switch1 (addr_reg));
7196 emit_insn (gen_fpu_switch0 (addr_reg));
7199 /* Is the given character a logical line separator for the assembler? */
7200 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7201 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7205 sh_insn_length_adjustment (insn)
7208 /* Instructions with unfilled delay slots take up an extra two bytes for
7209 the nop in the delay slot. */
7210 if (((GET_CODE (insn) == INSN
7211 && GET_CODE (PATTERN (insn)) != USE
7212 && GET_CODE (PATTERN (insn)) != CLOBBER)
7213 || GET_CODE (insn) == CALL_INSN
7214 || (GET_CODE (insn) == JUMP_INSN
7215 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7216 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7217 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7218 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7221 /* SH2e has a bug that prevents the use of annulled branches, so if
7222 the delay slot is not filled, we'll have to put a NOP in it. */
7223 if (sh_cpu == CPU_SH2E
7224 && GET_CODE (insn) == JUMP_INSN
7225 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7226 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7227 && get_attr_type (insn) == TYPE_CBRANCH
7228 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7231 /* sh-dsp parallel processing insn take four bytes instead of two. */
7233 if (GET_CODE (insn) == INSN)
7236 rtx body = PATTERN (insn);
7237 const char *template;
7239 int maybe_label = 1;
7241 if (GET_CODE (body) == ASM_INPUT)
7242 template = XSTR (body, 0);
7243 else if (asm_noperands (body) >= 0)
7245 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7254 while (c == ' ' || c == '\t');
7255 /* all sh-dsp parallel-processing insns start with p.
7256 The only non-ppi sh insn starting with p is pref.
7257 The only ppi starting with pr is prnd. */
7258 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7260 /* The repeat pseudo-insn expands two three insns, a total of
7261 six bytes in size. */
7262 else if ((c == 'r' || c == 'R')
7263 && ! strncasecmp ("epeat", template, 5))
7265 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7267 /* If this is a label, it is obviously not a ppi insn. */
7268 if (c == ':' && maybe_label)
7273 else if (c == '\'' || c == '"')
7278 maybe_label = c != ':';
7286 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7287 isn't protected by a PIC unspec. */
7289 nonpic_symbol_mentioned_p (x)
7292 register const char *fmt;
7295 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7296 || GET_CODE (x) == PC)
7299 /* We don't want to look into the possible MEM location of a
7300 CONST_DOUBLE, since we're not going to use it, in general. */
7301 if (GET_CODE (x) == CONST_DOUBLE)
7304 if (GET_CODE (x) == UNSPEC
7305 && (XINT (x, 1) == UNSPEC_PIC
7306 || XINT (x, 1) == UNSPEC_GOT
7307 || XINT (x, 1) == UNSPEC_GOTOFF
7308 || XINT (x, 1) == UNSPEC_GOTPLT
7309 || XINT (x, 1) == UNSPEC_GOTTPOFF
7310 || XINT (x, 1) == UNSPEC_DTPOFF
7311 || XINT (x, 1) == UNSPEC_PLT))
7314 fmt = GET_RTX_FORMAT (GET_CODE (x));
7315 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7321 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7322 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
7325 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
7332 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
7333 @GOTOFF in `reg'. */
7335 legitimize_pic_address (orig, mode, reg)
7337 enum machine_mode mode ATTRIBUTE_UNUSED;
7340 if (tls_symbolic_operand (orig, Pmode))
7343 if (GET_CODE (orig) == LABEL_REF
7344 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
7347 reg = gen_reg_rtx (Pmode);
7349 emit_insn (gen_symGOTOFF2reg (reg, orig));
7352 else if (GET_CODE (orig) == SYMBOL_REF)
7355 reg = gen_reg_rtx (Pmode);
7357 emit_insn (gen_symGOT2reg (reg, orig));
7363 /* Mark the use of a constant in the literal table. If the constant
7364 has multiple labels, make it unique. */
7366 mark_constant_pool_use (x)
7369 rtx insn, lab, pattern;
7374 switch (GET_CODE (x))
7384 /* Get the first label in the list of labels for the same constant
7385 and delete another labels in the list. */
7387 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7389 if (GET_CODE (insn) != CODE_LABEL
7390 || LABEL_REFS (insn) != NEXT_INSN (insn))
7395 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
7396 INSN_DELETED_P (insn) = 1;
7398 /* Mark constants in a window. */
7399 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
7401 if (GET_CODE (insn) != INSN)
7404 pattern = PATTERN (insn);
7405 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
7408 switch (XINT (pattern, 1))
7410 case UNSPECV_CONST2:
7411 case UNSPECV_CONST4:
7412 case UNSPECV_CONST8:
7413 XVECEXP (pattern, 0, 1) = const1_rtx;
7415 case UNSPECV_WINDOW_END:
7416 if (XVECEXP (pattern, 0, 0) == x)
7419 case UNSPECV_CONST_END:
7429 /* Return true if it's possible to redirect BRANCH1 to the destination
7430 of an unconditional jump BRANCH2. We only want to do this if the
7431 resulting branch will have a short displacement. */
7433 sh_can_redirect_branch (branch1, branch2)
7437 if (flag_expensive_optimizations && simplejump_p (branch2))
7439 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
7443 for (distance = 0, insn = NEXT_INSN (branch1);
7444 insn && distance < 256;
7445 insn = PREV_INSN (insn))
7450 distance += get_attr_length (insn);
7452 for (distance = 0, insn = NEXT_INSN (branch1);
7453 insn && distance < 256;
7454 insn = NEXT_INSN (insn))
7459 distance += get_attr_length (insn);
7465 /* Return nonzero if register old_reg can be renamed to register new_reg. */
7467 sh_hard_regno_rename_ok (old_reg, new_reg)
7468 unsigned int old_reg ATTRIBUTE_UNUSED;
7469 unsigned int new_reg;
7472 /* Interrupt functions can only use registers that have already been
7473 saved by the prologue, even if they would normally be
7476 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
7482 /* Function to update the integer COST
7483 based on the relationship between INSN that is dependent on
7484 DEP_INSN through the dependence LINK. The default is to make no
7485 adjustment to COST. This can be used for example to specify to
7486 the scheduler that an output- or anti-dependence does not incur
7487 the same cost as a data-dependence. The return value should be
7488 the new value for COST. */
7490 sh_adjust_cost (insn, link, dep_insn, cost)
7492 rtx link ATTRIBUTE_UNUSED;
7500 /* On SHmedia, if the dependence is an anti-dependence or
7501 output-dependence, there is no cost. */
7502 if (REG_NOTE_KIND (link) != 0)
7505 if (get_attr_is_mac_media (insn)
7506 && get_attr_is_mac_media (dep_insn))
7509 else if (REG_NOTE_KIND (link) == 0)
7511 enum attr_type dep_type, type;
7513 if (recog_memoized (insn) < 0
7514 || recog_memoized (dep_insn) < 0)
7517 dep_type = get_attr_type (dep_insn);
7518 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7520 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7521 && (type = get_attr_type (insn)) != TYPE_CALL
7522 && type != TYPE_SFUNC)
7525 /* The only input for a call that is timing-critical is the
7526 function's address. */
7527 if (GET_CODE(insn) == CALL_INSN)
7529 rtx call = PATTERN (insn);
7531 if (GET_CODE (call) == PARALLEL)
7532 call = XVECEXP (call, 0 ,0);
7533 if (GET_CODE (call) == SET)
7534 call = SET_SRC (call);
7535 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7536 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7539 /* Likewise, the most timing critical input for an sfuncs call
7540 is the function address. However, sfuncs typically start
7541 using their arguments pretty quickly.
7542 Assume a four cycle delay before they are needed. */
7543 /* All sfunc calls are parallels with at least four components.
7544 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7545 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7546 && XVECLEN (PATTERN (insn), 0) >= 4
7547 && (reg = sfunc_uses_reg (insn)))
7549 if (! reg_set_p (reg, dep_insn))
7552 /* When the preceding instruction loads the shift amount of
7553 the following SHAD/SHLD, the latency of the load is increased
7556 && get_attr_type (insn) == TYPE_DYN_SHIFT
7557 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7558 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7559 XEXP (SET_SRC (single_set(insn)),
7562 /* When an LS group instruction with a latency of less than
7563 3 cycles is followed by a double-precision floating-point
7564 instruction, FIPR, or FTRV, the latency of the first
7565 instruction is increased to 3 cycles. */
7567 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7568 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7570 /* The lsw register of a double-precision computation is ready one
7572 else if (reload_completed
7573 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7574 && (use_pat = single_set (insn))
7575 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7579 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7580 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7583 /* An anti-dependence penalty of two applies if the first insn is a double
7584 precision fadd / fsub / fmul. */
7585 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7586 && recog_memoized (dep_insn) >= 0
7587 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7588 /* A lot of alleged anti-flow dependences are fake,
7589 so check this one is real. */
7590 && flow_dependent_p (dep_insn, insn))
7597 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7598 if DEP_INSN is anti-flow dependent on INSN. */
7600 flow_dependent_p (insn, dep_insn)
7603 rtx tmp = PATTERN (insn);
7605 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7606 return tmp == NULL_RTX;
7609 /* A helper function for flow_dependent_p called through note_stores. */
7611 flow_dependent_p_1 (x, pat, data)
7613 rtx pat ATTRIBUTE_UNUSED;
7616 rtx * pinsn = (rtx *) data;
7618 if (*pinsn && reg_referenced_p (x, *pinsn))
7622 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7623 'special function' patterns (type sfunc) that clobber pr, but that
7624 do not look like function calls to leaf_function_p. Hence we must
7625 do this extra check. */
7629 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7632 /* This Function returns nonzero if the DFA based scheduler interface
7633 is to be used. At present this is supported for the SH4 only. */
7635 sh_use_dfa_interface()
7637 if (TARGET_HARD_SH4)
7643 /* This function returns "2" to indicate dual issue for the SH4
7644 processor. To be used by the DFA pipeline description. */
7648 if (TARGET_SUPERSCALAR)
7654 /* SHmedia requires registers for branches, so we can't generate new
7655 branches past reload. */
7657 sh_cannot_modify_jumps_p ()
7659 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7663 sh_ms_bitfield_layout_p (record_type)
7664 tree record_type ATTRIBUTE_UNUSED;
7670 On the SH1..SH4, the trampoline looks like
7671 2 0002 D202 mov.l l2,r2
7672 1 0000 D301 mov.l l1,r3
7675 5 0008 00000000 l1: .long area
7676 6 000c 00000000 l2: .long function
7678 SH5 (compact) uses r1 instead of r3 for the static chain. */
7681 /* Emit RTL insns to initialize the variable parts of a trampoline.
7682 FNADDR is an RTX for the address of the function's pure code.
7683 CXT is an RTX for the static chain value for the function. */
7686 sh_initialize_trampoline (tramp, fnaddr, cxt)
7687 rtx tramp, fnaddr, cxt;
7689 if (TARGET_SHMEDIA64)
7694 rtx movi1 = GEN_INT (0xcc000010);
7695 rtx shori1 = GEN_INT (0xc8000010);
7698 /* The following trampoline works within a +- 128 KB range for cxt:
7699 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7700 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7701 gettr tr1,r1; blink tr0,r63 */
7702 /* Address rounding makes it hard to compute the exact bounds of the
7703 offset for this trampoline, but we have a rather generous offset
7704 range, so frame_offset should do fine as an upper bound. */
7705 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7707 /* ??? could optimize this trampoline initialization
7708 by writing DImode words with two insns each. */
7709 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7710 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7711 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7712 insn = gen_rtx_AND (DImode, insn, mask);
7713 /* Or in ptb/u .,tr1 pattern */
7714 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7715 insn = force_operand (insn, NULL_RTX);
7716 insn = gen_lowpart (SImode, insn);
7717 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7718 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7719 insn = gen_rtx_AND (DImode, insn, mask);
7720 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7721 insn = gen_lowpart (SImode, insn);
7722 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7723 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7724 insn = gen_rtx_AND (DImode, insn, mask);
7725 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7726 insn = gen_lowpart (SImode, insn);
7727 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7728 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7729 insn = gen_rtx_AND (DImode, insn, mask);
7730 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7731 insn = gen_lowpart (SImode, insn);
7732 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7734 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7735 insn = gen_rtx_AND (DImode, insn, mask);
7736 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7737 insn = gen_lowpart (SImode, insn);
7738 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7740 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7741 GEN_INT (0x6bf10600));
7742 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7743 GEN_INT (0x4415fc10));
7744 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7745 GEN_INT (0x4401fff0));
7746 emit_insn (gen_ic_invalidate_line (tramp));
7749 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7750 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7752 tramp_templ = gen_datalabel_ref (tramp_templ);
7753 dst = gen_rtx_MEM (BLKmode, tramp);
7754 src = gen_rtx_MEM (BLKmode, tramp_templ);
7755 set_mem_align (dst, 256);
7756 set_mem_align (src, 64);
7757 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7759 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7761 emit_move_insn (gen_rtx_MEM (Pmode,
7762 plus_constant (tramp,
7764 + GET_MODE_SIZE (Pmode))),
7766 emit_insn (gen_ic_invalidate_line (tramp));
7769 else if (TARGET_SHMEDIA)
7771 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7772 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7773 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7774 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7775 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7776 rotated 10 right, and higher 16 bit of every 32 selected. */
7778 = force_reg (V2HImode, (simplify_gen_subreg
7779 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7780 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7781 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7783 tramp = force_reg (Pmode, tramp);
7784 fnaddr = force_reg (SImode, fnaddr);
7785 cxt = force_reg (SImode, cxt);
7786 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7787 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7789 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
7790 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7791 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7792 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7793 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7794 gen_rtx_SUBREG (V2HImode, cxt, 0),
7796 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
7797 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7798 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7799 if (TARGET_LITTLE_ENDIAN)
7801 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7802 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7806 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7807 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7809 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7810 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7811 emit_insn (gen_ic_invalidate_line (tramp));
7814 else if (TARGET_SHCOMPACT)
7816 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7819 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7820 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7822 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7823 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7825 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7827 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7831 if (TARGET_USERMODE)
7832 emit_library_call (function_symbol ("__ic_invalidate"),
7833 0, VOIDmode, 1, tramp, SImode);
7835 emit_insn (gen_ic_invalidate_line (tramp));
7839 /* FIXME: This is overly conservative. A SHcompact function that
7840 receives arguments ``by reference'' will have them stored in its
7841 own stack frame, so it must not pass pointers or references to
7842 these arguments to other functions by means of sibling calls. */
7844 sh_function_ok_for_sibcall (decl, exp)
7846 tree exp ATTRIBUTE_UNUSED;
7849 && (! TARGET_SHCOMPACT
7850 || current_function_args_info.stack_regs == 0)
7851 && ! sh_cfun_interrupt_handler_p ());
7854 /* Machine specific built-in functions. */
7856 struct builtin_description
7858 const enum insn_code icode;
7859 const char *const name;
7863 /* describe number and signedness of arguments; arg[0] == result
7864 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7865 static const char signature_args[][4] =
7867 #define SH_BLTIN_V2SI2 0
7869 #define SH_BLTIN_V4HI2 1
7871 #define SH_BLTIN_V2SI3 2
7873 #define SH_BLTIN_V4HI3 3
7875 #define SH_BLTIN_V8QI3 4
7877 #define SH_BLTIN_MAC_HISI 5
7879 #define SH_BLTIN_SH_HI 6
7881 #define SH_BLTIN_SH_SI 7
7883 #define SH_BLTIN_V4HI2V2SI 8
7885 #define SH_BLTIN_V4HI2V8QI 9
7887 #define SH_BLTIN_SISF 10
7889 #define SH_BLTIN_LDUA_L 11
7891 #define SH_BLTIN_LDUA_Q 12
7893 #define SH_BLTIN_STUA_L 13
7895 #define SH_BLTIN_STUA_Q 14
7897 #define SH_BLTIN_UDI 15
7899 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
7900 #define SH_BLTIN_2 16
7901 #define SH_BLTIN_SU 16
7903 #define SH_BLTIN_3 17
7904 #define SH_BLTIN_SUS 17
7906 #define SH_BLTIN_PSSV 18
7908 #define SH_BLTIN_XXUU 19
7909 #define SH_BLTIN_UUUU 19
7911 #define SH_BLTIN_PV 20
7914 /* mcmv: operands considered unsigned. */
7915 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
7916 /* mperm: control value considered unsigned int. */
7917 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
7918 /* mshards_q: returns signed short. */
7919 /* nsb: takes long long arg, returns unsigned char. */
7920 static const struct builtin_description bdesc[] =
7922 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
7923 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
7924 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
7925 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
7926 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
7927 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
7928 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
7930 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7931 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7933 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
7934 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
7935 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
7936 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
7937 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
7938 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
7939 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
7940 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
7941 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
7942 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
7943 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
7944 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
7945 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
7946 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
7947 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
7948 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
7949 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
7950 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
7951 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
7952 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
7953 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
7954 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
7955 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
7956 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
7957 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
7958 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
7959 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
7960 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
7961 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
7962 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
7963 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
7964 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
7965 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
7966 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
7967 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
7968 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
7969 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
7970 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
7971 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
7972 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
7973 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
7974 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
7975 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
7976 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
7977 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
7978 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
7979 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
7980 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
7981 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
7982 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
7983 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
7984 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
7985 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
7986 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
7988 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7989 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7990 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7991 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7992 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7993 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7994 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7995 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7996 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7997 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7998 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7999 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8000 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8001 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8002 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8003 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8005 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
8006 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
8008 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8009 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8014 sh_media_init_builtins ()
8016 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8017 const struct builtin_description *d;
8019 memset (shared, 0, sizeof shared);
8020 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
8022 tree type, arg_type;
8023 int signature = d->signature;
8026 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8027 type = shared[signature];
8030 int has_result = signature_args[signature][0] != 0;
8032 if (signature_args[signature][1] == 8
8033 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8035 if (! TARGET_FPU_ANY
8036 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8038 type = void_list_node;
8041 int arg = signature_args[signature][i];
8042 int opno = i - 1 + has_result;
8045 arg_type = ptr_type_node;
8047 arg_type = ((*lang_hooks.types.type_for_mode)
8048 (insn_data[d->icode].operand[opno].mode,
8053 arg_type = void_type_node;
8056 type = tree_cons (NULL_TREE, arg_type, type);
8058 type = build_function_type (arg_type, type);
8059 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8060 shared[signature] = type;
8062 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8071 sh_media_init_builtins ();
8074 /* Expand an expression EXP that calls a built-in function,
8075 with result going to TARGET if that's convenient
8076 (and in mode MODE if that's convenient).
8077 SUBTARGET may be used as the target for computing one of EXP's operands.
8078 IGNORE is nonzero if the value is to be ignored. */
8081 sh_expand_builtin (exp, target, subtarget, mode, ignore)
8084 rtx subtarget ATTRIBUTE_UNUSED;
8085 enum machine_mode mode ATTRIBUTE_UNUSED;
8088 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8089 tree arglist = TREE_OPERAND (exp, 1);
8090 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8091 const struct builtin_description *d = &bdesc[fcode];
8092 enum insn_code icode = d->icode;
8093 int signature = d->signature;
8094 enum machine_mode tmode = VOIDmode;
8099 if (signature_args[signature][0])
8104 tmode = insn_data[icode].operand[0].mode;
8106 || GET_MODE (target) != tmode
8107 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8108 target = gen_reg_rtx (tmode);
8114 for (i = 1; i <= 3; i++, nop++)
8117 enum machine_mode opmode, argmode;
8119 if (! signature_args[signature][i])
8121 arg = TREE_VALUE (arglist);
8122 if (arg == error_mark_node)
8124 arglist = TREE_CHAIN (arglist);
8125 opmode = insn_data[icode].operand[nop].mode;
8126 argmode = TYPE_MODE (TREE_TYPE (arg));
8127 if (argmode != opmode)
8128 arg = build1 (NOP_EXPR,
8129 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8130 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8131 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8132 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8138 pat = (*insn_data[d->icode].genfun) (op[0]);
8141 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8144 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8147 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8159 sh_expand_unop_v2sf (code, op0, op1)
8163 rtx sel0 = const0_rtx;
8164 rtx sel1 = const1_rtx;
8165 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
8166 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8168 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8169 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8173 sh_expand_binop_v2sf (code, op0, op1, op2)
8177 rtx sel0 = const0_rtx;
8178 rtx sel1 = const1_rtx;
8179 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx))
8181 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8183 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8184 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8187 /* Return the class of registers for which a mode change from FROM to TO
8190 sh_cannot_change_mode_class (from, to, class)
8191 enum machine_mode from, to;
8192 enum reg_class class;
8194 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8196 if (TARGET_LITTLE_ENDIAN)
8198 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8199 return reg_classes_intersect_p (DF_REGS, class);
8203 if (GET_MODE_SIZE (from) < 8)
8204 return reg_classes_intersect_p (DF_HI_REGS, class);
8211 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8212 that label is used. */
8215 sh_mark_label (address, nuses)
8219 if (GOTOFF_P (address))
8221 /* Extract the label or symbol. */
8222 address = XEXP (address, 0);
8223 if (GET_CODE (address) == PLUS)
8224 address = XEXP (address, 0);
8225 address = XVECEXP (address, 0, 0);
8227 if (GET_CODE (address) == LABEL_REF
8228 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8229 LABEL_NUSES (XEXP (address, 0)) += nuses;
8232 /* Compute extra cost of moving data between one register class
8235 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8236 uses this information. Hence, the general register <-> floating point
8237 register information here is not used for SFmode. */
8240 sh_register_move_cost (mode, srcclass, dstclass)
8241 enum machine_mode mode;
8242 enum reg_class srcclass, dstclass;
8244 if (dstclass == T_REGS || dstclass == PR_REGS)
8247 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8248 && REGCLASS_HAS_FP_REG (srcclass)
8249 && REGCLASS_HAS_FP_REG (dstclass))
8252 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
8253 || (dstclass== MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
8256 if ((REGCLASS_HAS_FP_REG (dstclass)
8257 && REGCLASS_HAS_GENERAL_REG (srcclass))
8258 || (REGCLASS_HAS_GENERAL_REG (dstclass)
8259 && REGCLASS_HAS_FP_REG (srcclass)))
8260 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
8261 * ((GET_MODE_SIZE (mode) + 7) / 8U));
8263 if ((dstclass == FPUL_REGS
8264 && REGCLASS_HAS_GENERAL_REG (srcclass))
8265 || (srcclass == FPUL_REGS
8266 && REGCLASS_HAS_GENERAL_REG (dstclass)))
8269 if ((dstclass == FPUL_REGS
8270 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
8271 || (srcclass == FPUL_REGS
8272 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
8275 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8276 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8279 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8280 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8285 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
8286 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
8287 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
8289 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
8292 /* Like register_operand, but take into account that SHMEDIA can use
8293 the constant zero like a general register. */
8295 sh_register_operand (op, mode)
8297 enum machine_mode mode;
8299 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
8301 return register_operand (op, mode);
8304 static rtx emit_load_ptr PARAMS ((rtx, rtx));
8307 emit_load_ptr (reg, addr)
8310 rtx mem = gen_rtx_MEM (ptr_mode, addr);
8312 if (Pmode != ptr_mode)
8313 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
8314 return emit_move_insn (reg, mem);
8318 sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
8320 tree thunk_fndecl ATTRIBUTE_UNUSED;
8321 HOST_WIDE_INT delta;
8322 HOST_WIDE_INT vcall_offset;
8325 CUMULATIVE_ARGS cum;
8326 int structure_value_byref = 0;
8327 rtx this, this_value, sibcall, insns, funexp;
8328 tree funtype = TREE_TYPE (function);
8329 int simple_add = CONST_OK_FOR_ADD (delta);
8331 rtx scratch0, scratch1, scratch2;
8333 reload_completed = 1;
8335 current_function_uses_only_leaf_regs = 1;
8337 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8339 /* Find the "this" pointer. We have such a wide range of ABIs for the
8340 SH that it's best to do this completely machine independently.
8341 "this" is passed as first argument, unless a structure return pointer
8342 comes first, in which case "this" comes second. */
8343 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0);
8344 #ifndef PCC_STATIC_STRUCT_RETURN
8345 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
8346 structure_value_byref = 1;
8347 #endif /* not PCC_STATIC_STRUCT_RETURN */
8348 if (structure_value_byref && struct_value_rtx == 0)
8350 tree ptype = build_pointer_type (TREE_TYPE (funtype));
8352 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
8354 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
8356 /* For SHcompact, we only have r0 for a scratch register: r1 is the
8357 static chain pointer (even if you can't have nested virtual functions
8358 right now, someone might implement them sometime), and the rest of the
8359 registers are used for argument passing, are callee-saved, or reserved. */
8360 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
8363 scratch1 = gen_rtx_REG (ptr_mode, 1);
8364 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
8365 pointing where to return struct values. */
8366 scratch2 = gen_rtx_REG (Pmode, 3);
8368 else if (TARGET_SHMEDIA)
8370 scratch1 = gen_rtx_REG (ptr_mode, 21);
8371 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
8374 this_value = plus_constant (this, delta);
8376 && (simple_add || scratch0 != scratch1)
8377 && strict_memory_address_p (ptr_mode, this_value))
8379 emit_load_ptr (scratch0, this_value);
8385 else if (simple_add)
8386 emit_move_insn (this, this_value);
8389 emit_move_insn (scratch1, GEN_INT (delta));
8390 emit_insn (gen_add2_insn (this, scratch1));
8398 emit_load_ptr (scratch0, this);
8400 offset_addr = plus_constant (scratch0, vcall_offset);
8401 if (strict_memory_address_p (ptr_mode, offset_addr))
8403 else if (! TARGET_SH5)
8405 /* scratch0 != scratch1, and we have indexed loads. Get better
8406 schedule by loading the offset into r1 and using an indexed
8407 load - then the load of r1 can issue before the load from
8408 (this + delta) finishes. */
8409 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8410 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
8412 else if (CONST_OK_FOR_ADD (vcall_offset))
8414 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
8415 offset_addr = scratch0;
8417 else if (scratch0 != scratch1)
8419 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8420 emit_insn (gen_add2_insn (scratch0, scratch1));
8421 offset_addr = scratch0;
8424 abort (); /* FIXME */
8425 emit_load_ptr (scratch0, offset_addr);
8427 if (Pmode != ptr_mode)
8428 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
8429 emit_insn (gen_add2_insn (this, scratch0));
8432 /* Generate a tail call to the target function. */
8433 if (! TREE_USED (function))
8435 assemble_external (function);
8436 TREE_USED (function) = 1;
8438 funexp = XEXP (DECL_RTL (function), 0);
8439 emit_move_insn (scratch2, funexp);
8440 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
8441 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
8442 SIBLING_CALL_P (sibcall) = 1;
8443 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
8446 /* Run just enough of rest_of_compilation to do scheduling and get
8447 the insns emitted. Note that use_thunk calls
8448 assemble_start_function and assemble_end_function. */
8450 insn_locators_initialize ();
8451 insns = get_insns ();
8453 if (optimize > 0 && flag_schedule_insns_after_reload)
8456 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
8457 life_analysis (insns, rtl_dump_file, PROP_FINAL);
8459 split_all_insns (1);
8461 schedule_insns (rtl_dump_file);
8466 if (optimize > 0 && flag_delayed_branch)
8467 dbr_schedule (insns, rtl_dump_file);
8468 shorten_branches (insns);
8469 final_start_function (insns, file, 1);
8470 final (insns, file, 1, 0);
8471 final_end_function ();
8473 if (optimize > 0 && flag_schedule_insns_after_reload)
8475 /* Release all memory allocated by flow. */
8476 free_basic_block_vars (0);
8478 /* Release all memory held by regsets now. */
8479 regset_release_memory ();
8482 reload_completed = 0;
8487 function_symbol (const char *name)
8489 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
8490 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;