1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
52 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
53 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
55 /* These are some macros to abstract register modes. */
56 #define CONST_OK_FOR_ADD(size) \
57 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
58 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
59 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
60 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
62 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
63 int current_function_interrupt;
65 /* ??? The pragma interrupt support will not work for SH3. */
66 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
67 output code for the next function appropriate for an interrupt handler. */
70 /* This is set by the trap_exit attribute for functions. It specifies
71 a trap number to be used in a trapa instruction at function exit
72 (instead of an rte instruction). */
75 /* This is used by the sp_switch attribute for functions. It specifies
76 a variable holding the address of the stack the interrupt function
77 should switch to/from at entry/exit. */
80 /* This is set by #pragma trapa, and is similar to the above, except that
81 the compiler doesn't emit code to preserve all registers. */
82 static int pragma_trapa;
84 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
85 which has a separate set of low regs for User and Supervisor modes.
86 This should only be used for the lowest level of interrupts. Higher levels
87 of interrupts must save the registers in case they themselves are
89 int pragma_nosave_low_regs;
91 /* This is used for communication between SETUP_INCOMING_VARARGS and
92 sh_expand_prologue. */
93 int current_function_anonymous_args;
95 /* Global variables for machine-dependent things. */
97 /* Which cpu are we scheduling for. */
98 enum processor_type sh_cpu;
100 /* Saved operands from the last compare to use when we generate an scc
106 /* Provides the class number of the smallest class containing
109 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
111 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
144 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
145 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
146 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
147 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
148 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
152 char sh_register_names[FIRST_PSEUDO_REGISTER] \
153 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
155 char sh_additional_register_names[ADDREGNAMES_SIZE] \
156 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
157 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
159 /* Provide reg_class from a letter such as appears in the machine
160 description. *: target independently reserved letter.
161 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
163 enum reg_class reg_class_from_letter[] =
165 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
166 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
167 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
168 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
169 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
170 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
171 /* y */ FPUL_REGS, /* z */ R0_REGS
174 int assembler_dialect;
176 static void split_branches PARAMS ((rtx));
177 static int branch_dest PARAMS ((rtx));
178 static void force_into PARAMS ((rtx, rtx));
179 static void print_slot PARAMS ((rtx));
180 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
181 static void dump_table PARAMS ((rtx));
182 static int hi_const PARAMS ((rtx));
183 static int broken_move PARAMS ((rtx));
184 static int mova_p PARAMS ((rtx));
185 static rtx find_barrier PARAMS ((int, rtx, rtx));
186 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
187 static rtx gen_block_redirect PARAMS ((rtx, int, int));
188 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
189 static rtx frame_insn PARAMS ((rtx));
190 static rtx push PARAMS ((int));
191 static void pop PARAMS ((int));
192 static void push_regs PARAMS ((HOST_WIDE_INT *));
193 static void calc_live_regs PARAMS ((int *, HOST_WIDE_INT *));
194 static void mark_use PARAMS ((rtx, rtx *));
195 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
196 static rtx mark_constant_pool_use PARAMS ((rtx));
197 const struct attribute_spec sh_attribute_table[];
198 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
199 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
200 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
201 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
202 static void sh_insert_attributes PARAMS ((tree, tree *));
203 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
204 static int sh_use_dfa_interface PARAMS ((void));
205 static int sh_issue_rate PARAMS ((void));
206 static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
208 static bool sh_cannot_modify_jumps_p PARAMS ((void));
209 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
211 static void sh_encode_section_info PARAMS ((tree, int));
212 static const char *sh_strip_name_encoding PARAMS ((const char *));
213 static void sh_init_builtins PARAMS ((void));
214 static void sh_media_init_builtins PARAMS ((void));
215 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
216 static void sh_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
217 HOST_WIDE_INT, tree));
218 static int flow_dependent_p PARAMS ((rtx, rtx));
219 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
220 static int shiftcosts PARAMS ((rtx));
221 static int andcosts PARAMS ((rtx));
222 static int addsubcosts PARAMS ((rtx));
223 static int multcosts PARAMS ((rtx));
224 static bool unspec_caller_rtx_p PARAMS ((rtx));
225 static bool sh_cannot_copy_insn_p PARAMS ((rtx));
226 static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
227 static int sh_address_cost PARAMS ((rtx));
229 /* Initialize the GCC target structure. */
230 #undef TARGET_ATTRIBUTE_TABLE
231 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
233 /* The next two are used for debug info when compiling with -gdwarf. */
234 #undef TARGET_ASM_UNALIGNED_HI_OP
235 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
236 #undef TARGET_ASM_UNALIGNED_SI_OP
237 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
239 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
240 #undef TARGET_ASM_UNALIGNED_DI_OP
241 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
242 #undef TARGET_ASM_ALIGNED_DI_OP
243 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
245 #undef TARGET_ASM_FUNCTION_EPILOGUE
246 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
248 #undef TARGET_ASM_OUTPUT_MI_THUNK
249 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
251 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
252 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
254 #undef TARGET_INSERT_ATTRIBUTES
255 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
257 #undef TARGET_SCHED_ADJUST_COST
258 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
260 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
261 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
263 #undef TARGET_SCHED_ISSUE_RATE
264 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
266 #undef TARGET_CANNOT_MODIFY_JUMPS_P
267 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
269 #undef TARGET_MS_BITFIELD_LAYOUT_P
270 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
272 #undef TARGET_ENCODE_SECTION_INFO
273 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
274 #undef TARGET_STRIP_NAME_ENCODING
275 #define TARGET_STRIP_NAME_ENCODING sh_strip_name_encoding
277 #undef TARGET_INIT_BUILTINS
278 #define TARGET_INIT_BUILTINS sh_init_builtins
279 #undef TARGET_EXPAND_BUILTIN
280 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
282 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
283 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
285 #undef TARGET_CANNOT_COPY_INSN_P
286 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
287 #undef TARGET_RTX_COSTS
288 #define TARGET_RTX_COSTS sh_rtx_costs
289 #undef TARGET_ADDRESS_COST
290 #define TARGET_ADDRESS_COST sh_address_cost
293 #undef TARGET_HAVE_TLS
294 #define TARGET_HAVE_TLS true
297 struct gcc_target targetm = TARGET_INITIALIZER;
299 /* Print the operand address in x to the stream. */
302 print_operand_address (stream, x)
306 switch (GET_CODE (x))
310 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
315 rtx base = XEXP (x, 0);
316 rtx index = XEXP (x, 1);
318 switch (GET_CODE (index))
321 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
322 reg_names[true_regnum (base)]);
328 int base_num = true_regnum (base);
329 int index_num = true_regnum (index);
331 fprintf (stream, "@(r0,%s)",
332 reg_names[MAX (base_num, index_num)]);
344 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
348 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
352 x = mark_constant_pool_use (x);
353 output_addr_const (stream, x);
358 /* Print operand x (an rtx) in assembler syntax to file stream
359 according to modifier code.
361 '.' print a .s if insn needs delay slot
362 ',' print LOCAL_LABEL_PREFIX
363 '@' print trap, rte or rts depending upon pragma interruptness
364 '#' output a nop if there is nothing to put in the delay slot
365 ''' print likelyhood suffix (/u for unlikely).
366 'O' print a constant without the #
367 'R' print the LSW of a dp value - changes if in little endian
368 'S' print the MSW of a dp value - changes if in little endian
369 'T' print the next word of a dp value - same as 'R' in big endian mode.
370 'M' print an `x' if `m' will print `base,index'.
371 'N' print 'r63' if the operand is (const_int 0).
372 'm' print a pair `base,offset' or `base,index', for LD and ST.
373 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
374 'o' output an operator. */
377 print_operand (stream, x, code)
386 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
387 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
388 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
391 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
395 fprintf (stream, "trapa #%d", trap_exit);
396 else if (sh_cfun_interrupt_handler_p ())
397 fprintf (stream, "rte");
399 fprintf (stream, "rts");
402 /* Output a nop if there's nothing in the delay slot. */
403 if (dbr_sequence_length () == 0)
404 fprintf (stream, "\n\tnop");
408 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
410 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
411 fputs ("/u", stream);
415 x = mark_constant_pool_use (x);
416 output_addr_const (stream, x);
419 fputs (reg_names[REGNO (x) + LSW], (stream));
422 fputs (reg_names[REGNO (x) + MSW], (stream));
425 /* Next word of a double. */
426 switch (GET_CODE (x))
429 fputs (reg_names[REGNO (x) + 1], (stream));
432 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
433 && GET_CODE (XEXP (x, 0)) != POST_INC)
434 x = adjust_address (x, SImode, 4);
435 print_operand_address (stream, XEXP (x, 0));
442 switch (GET_CODE (x))
444 case PLUS: fputs ("add", stream); break;
445 case MINUS: fputs ("sub", stream); break;
446 case MULT: fputs ("mul", stream); break;
447 case DIV: fputs ("div", stream); break;
448 case EQ: fputs ("eq", stream); break;
449 case NE: fputs ("ne", stream); break;
450 case GT: case LT: fputs ("gt", stream); break;
451 case GE: case LE: fputs ("ge", stream); break;
452 case GTU: case LTU: fputs ("gtu", stream); break;
453 case GEU: case LEU: fputs ("geu", stream); break;
459 if (GET_CODE (x) == MEM
460 && GET_CODE (XEXP (x, 0)) == PLUS
461 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
462 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
467 if (GET_CODE (x) != MEM)
470 switch (GET_CODE (x))
474 print_operand (stream, x, 0);
475 fputs (", 0", stream);
479 print_operand (stream, XEXP (x, 0), 0);
480 fputs (", ", stream);
481 print_operand (stream, XEXP (x, 1), 0);
490 if (x == CONST0_RTX (GET_MODE (x)))
492 fprintf ((stream), "r63");
497 if (GET_CODE (x) == CONST_INT)
499 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
506 switch (GET_CODE (x))
508 /* FIXME: We need this on SHmedia32 because reload generates
509 some sign-extended HI or QI loads into DImode registers
510 but, because Pmode is SImode, the address ends up with a
511 subreg:SI of the DImode register. Maybe reload should be
512 fixed so as to apply alter_subreg to such loads? */
514 if (SUBREG_BYTE (x) != 0
515 || GET_CODE (SUBREG_REG (x)) != REG)
522 if (FP_REGISTER_P (REGNO (x))
523 && GET_MODE (x) == V16SFmode)
524 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
525 else if (FP_REGISTER_P (REGNO (x))
526 && GET_MODE (x) == V4SFmode)
527 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
528 else if (GET_CODE (x) == REG
529 && GET_MODE (x) == V2SFmode)
530 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
531 else if (FP_REGISTER_P (REGNO (x))
532 && GET_MODE_SIZE (GET_MODE (x)) > 4)
533 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
535 fputs (reg_names[REGNO (x)], (stream));
539 output_address (XEXP (x, 0));
544 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
545 && GET_MODE (XEXP (x, 0)) == DImode
546 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
547 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
549 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
552 if (GET_CODE (val) == ASHIFTRT)
555 if (GET_CODE (XEXP (val, 0)) == CONST)
557 output_addr_const (stream, XEXP (val, 0));
558 if (GET_CODE (XEXP (val, 0)) == CONST)
560 fputs (" >> ", stream);
561 output_addr_const (stream, XEXP (val, 1));
566 if (GET_CODE (val) == CONST)
568 output_addr_const (stream, val);
569 if (GET_CODE (val) == CONST)
572 fputs (" & 65535)", stream);
580 output_addr_const (stream, x);
587 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
589 force_into (value, target)
592 value = force_operand (value, target);
593 if (! rtx_equal_p (value, target))
594 emit_insn (gen_move_insn (target, value));
597 /* Emit code to perform a block move. Choose the best method.
599 OPERANDS[0] is the destination.
600 OPERANDS[1] is the source.
601 OPERANDS[2] is the size.
602 OPERANDS[3] is the alignment safe to use. */
605 expand_block_move (operands)
608 int align = INTVAL (operands[3]);
609 int constp = (GET_CODE (operands[2]) == CONST_INT);
610 int bytes = (constp ? INTVAL (operands[2]) : 0);
612 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
613 alignment, or if it isn't a multiple of 4 bytes, then fail. */
614 if (! constp || align < 4 || (bytes % 4 != 0))
621 else if (bytes == 12)
626 rtx r4 = gen_rtx (REG, SImode, 4);
627 rtx r5 = gen_rtx (REG, SImode, 5);
629 entry_name = get_identifier ("__movstrSI12_i4");
631 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
632 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
633 force_into (XEXP (operands[0], 0), r4);
634 force_into (XEXP (operands[1], 0), r5);
635 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
638 else if (! TARGET_SMALLCODE)
644 rtx r4 = gen_rtx (REG, SImode, 4);
645 rtx r5 = gen_rtx (REG, SImode, 5);
646 rtx r6 = gen_rtx (REG, SImode, 6);
648 entry_name = get_identifier (bytes & 4
650 : "__movstr_i4_even");
651 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
652 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
653 force_into (XEXP (operands[0], 0), r4);
654 force_into (XEXP (operands[1], 0), r5);
657 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
658 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
670 rtx r4 = gen_rtx_REG (SImode, 4);
671 rtx r5 = gen_rtx_REG (SImode, 5);
673 sprintf (entry, "__movstrSI%d", bytes);
674 entry_name = get_identifier (entry);
675 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
676 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
677 force_into (XEXP (operands[0], 0), r4);
678 force_into (XEXP (operands[1], 0), r5);
679 emit_insn (gen_block_move_real (func_addr_rtx));
683 /* This is the same number of bytes as a memcpy call, but to a different
684 less common function name, so this will occasionally use more space. */
685 if (! TARGET_SMALLCODE)
690 int final_switch, while_loop;
691 rtx r4 = gen_rtx_REG (SImode, 4);
692 rtx r5 = gen_rtx_REG (SImode, 5);
693 rtx r6 = gen_rtx_REG (SImode, 6);
695 entry_name = get_identifier ("__movstr");
696 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
697 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
698 force_into (XEXP (operands[0], 0), r4);
699 force_into (XEXP (operands[1], 0), r5);
701 /* r6 controls the size of the move. 16 is decremented from it
702 for each 64 bytes moved. Then the negative bit left over is used
703 as an index into a list of move instructions. e.g., a 72 byte move
704 would be set up with size(r6) = 14, for one iteration through the
705 big while loop, and a switch of -2 for the last part. */
707 final_switch = 16 - ((bytes / 4) % 16);
708 while_loop = ((bytes / 4) / 16 - 1) * 16;
709 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
710 emit_insn (gen_block_lump_real (func_addr_rtx));
717 /* Prepare operands for a move define_expand; specifically, one of the
718 operands must be in a register. */
721 prepare_move_operands (operands, mode)
723 enum machine_mode mode;
725 if ((mode == SImode || mode == DImode)
727 && ! ((mode == Pmode || mode == ptr_mode)
728 && tls_symbolic_operand (operands[1], Pmode) != 0))
731 if (SYMBOLIC_CONST_P (operands[1]))
733 if (GET_CODE (operands[0]) == MEM)
734 operands[1] = force_reg (Pmode, operands[1]);
735 else if (TARGET_SHMEDIA
736 && GET_CODE (operands[1]) == LABEL_REF
737 && target_reg_operand (operands[0], mode))
741 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
742 operands[1] = legitimize_pic_address (operands[1], mode, temp);
745 else if (GET_CODE (operands[1]) == CONST
746 && GET_CODE (XEXP (operands[1], 0)) == PLUS
747 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
749 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
750 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
752 operands[1] = expand_binop (mode, add_optab, temp,
753 XEXP (XEXP (operands[1], 0), 1),
754 no_new_pseudos ? temp
755 : gen_reg_rtx (Pmode),
760 if (! reload_in_progress && ! reload_completed)
762 /* Copy the source to a register if both operands aren't registers. */
763 if (! register_operand (operands[0], mode)
764 && ! sh_register_operand (operands[1], mode))
765 operands[1] = copy_to_mode_reg (mode, operands[1]);
767 /* This case can happen while generating code to move the result
768 of a library call to the target. Reject `st r0,@(rX,rY)' because
769 reload will fail to find a spill register for rX, since r0 is already
770 being used for the source. */
771 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
772 && GET_CODE (operands[0]) == MEM
773 && GET_CODE (XEXP (operands[0], 0)) == PLUS
774 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
775 operands[1] = copy_to_mode_reg (mode, operands[1]);
778 if (mode == Pmode || mode == ptr_mode)
781 enum tls_model tls_kind;
785 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
787 rtx tga_op1, tga_ret, tmp, tmp2;
792 case TLS_MODEL_GLOBAL_DYNAMIC:
793 tga_ret = gen_rtx_REG (Pmode, R0_REG);
794 emit_insn (gen_tls_global_dynamic (tga_ret, op1));
798 case TLS_MODEL_LOCAL_DYNAMIC:
799 tga_ret = gen_rtx_REG (Pmode, R0_REG);
800 emit_insn (gen_tls_local_dynamic (tga_ret, op1));
802 tmp = gen_reg_rtx (Pmode);
803 emit_move_insn (tmp, tga_ret);
805 if (register_operand (op0, Pmode))
808 tmp2 = gen_reg_rtx (Pmode);
810 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
814 case TLS_MODEL_INITIAL_EXEC:
816 emit_insn (gen_GOTaddr2picreg ());
817 tga_op1 = gen_reg_rtx (Pmode);
818 tmp = gen_sym2GOTTPOFF (op1);
819 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
823 case TLS_MODEL_LOCAL_EXEC:
824 tmp2 = gen_reg_rtx (Pmode);
825 emit_insn (gen_load_gbr (tmp2));
826 tmp = gen_reg_rtx (Pmode);
827 emit_insn (gen_symTPOFF2reg (tmp, op1));
828 RTX_UNCHANGING_P (tmp) = 1;
830 if (register_operand (op0, Pmode))
833 op1 = gen_reg_rtx (Pmode);
835 emit_insn (gen_addsi3 (op1, tmp, tmp2));
848 /* Prepare the operands for an scc instruction; make sure that the
849 compare has been done. */
851 prepare_scc_operands (code)
854 rtx t_reg = gen_rtx_REG (SImode, T_REG);
855 enum rtx_code oldcode = code;
856 enum machine_mode mode;
858 /* First need a compare insn. */
862 /* It isn't possible to handle this case. */
881 rtx tmp = sh_compare_op0;
882 sh_compare_op0 = sh_compare_op1;
883 sh_compare_op1 = tmp;
886 mode = GET_MODE (sh_compare_op0);
887 if (mode == VOIDmode)
888 mode = GET_MODE (sh_compare_op1);
890 sh_compare_op0 = force_reg (mode, sh_compare_op0);
891 if ((code != EQ && code != NE
892 && (sh_compare_op1 != const0_rtx
893 || code == GTU || code == GEU || code == LTU || code == LEU))
894 || (mode == DImode && sh_compare_op1 != const0_rtx)
895 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
896 sh_compare_op1 = force_reg (mode, sh_compare_op1);
898 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
899 (mode == SFmode ? emit_sf_insn : emit_df_insn)
900 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
901 gen_rtx (SET, VOIDmode, t_reg,
902 gen_rtx (code, SImode,
903 sh_compare_op0, sh_compare_op1)),
904 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
906 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
907 gen_rtx (code, SImode, sh_compare_op0,
913 /* Called from the md file, set up the operands of a compare instruction. */
916 from_compare (operands, code)
920 enum machine_mode mode = GET_MODE (sh_compare_op0);
922 if (mode == VOIDmode)
923 mode = GET_MODE (sh_compare_op1);
926 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
928 /* Force args into regs, since we can't use constants here. */
929 sh_compare_op0 = force_reg (mode, sh_compare_op0);
930 if (sh_compare_op1 != const0_rtx
931 || code == GTU || code == GEU
932 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
933 sh_compare_op1 = force_reg (mode, sh_compare_op1);
935 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
937 from_compare (operands, GT);
938 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
941 insn = gen_rtx_SET (VOIDmode,
942 gen_rtx_REG (SImode, T_REG),
943 gen_rtx (code, SImode, sh_compare_op0,
945 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
947 insn = gen_rtx (PARALLEL, VOIDmode,
949 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
950 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
956 /* Functions to output assembly code. */
958 /* Return a sequence of instructions to perform DI or DF move.
960 Since the SH cannot move a DI or DF in one instruction, we have
961 to take care when we see overlapping source and dest registers. */
964 output_movedouble (insn, operands, mode)
965 rtx insn ATTRIBUTE_UNUSED;
967 enum machine_mode mode;
969 rtx dst = operands[0];
970 rtx src = operands[1];
972 if (GET_CODE (dst) == MEM
973 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
974 return "mov.l %T1,%0\n\tmov.l %1,%0";
976 if (register_operand (dst, mode)
977 && register_operand (src, mode))
979 if (REGNO (src) == MACH_REG)
980 return "sts mach,%S0\n\tsts macl,%R0";
982 /* When mov.d r1,r2 do r2->r3 then r1->r2;
983 when mov.d r1,r0 do r1->r0 then r2->r1. */
985 if (REGNO (src) + 1 == REGNO (dst))
986 return "mov %T1,%T0\n\tmov %1,%0";
988 return "mov %1,%0\n\tmov %T1,%T0";
990 else if (GET_CODE (src) == CONST_INT)
992 if (INTVAL (src) < 0)
993 output_asm_insn ("mov #-1,%S0", operands);
995 output_asm_insn ("mov #0,%S0", operands);
999 else if (GET_CODE (src) == MEM)
1002 int dreg = REGNO (dst);
1003 rtx inside = XEXP (src, 0);
1005 if (GET_CODE (inside) == REG)
1006 ptrreg = REGNO (inside);
1007 else if (GET_CODE (inside) == SUBREG)
1008 ptrreg = subreg_regno (inside);
1009 else if (GET_CODE (inside) == PLUS)
1011 ptrreg = REGNO (XEXP (inside, 0));
1012 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1013 an offsettable address. Unfortunately, offsettable addresses use
1014 QImode to check the offset, and a QImode offsettable address
1015 requires r0 for the other operand, which is not currently
1016 supported, so we can't use the 'o' constraint.
1017 Thus we must check for and handle r0+REG addresses here.
1018 We punt for now, since this is likely very rare. */
1019 if (GET_CODE (XEXP (inside, 1)) == REG)
1022 else if (GET_CODE (inside) == LABEL_REF)
1023 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1024 else if (GET_CODE (inside) == POST_INC)
1025 return "mov.l %1,%0\n\tmov.l %1,%T0";
1029 /* Work out the safe way to copy. Copy into the second half first. */
1031 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1034 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1037 /* Print an instruction which would have gone into a delay slot after
1038 another instruction, but couldn't because the other instruction expanded
1039 into a sequence where putting the slot insn at the end wouldn't work. */
1045 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
1047 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1051 output_far_jump (insn, op)
1055 struct { rtx lab, reg, op; } this;
1056 rtx braf_base_lab = NULL_RTX;
1059 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1062 this.lab = gen_label_rtx ();
1066 && offset - get_attr_length (insn) <= 32766)
1069 jump = "mov.w %O0,%1; braf %1";
1077 jump = "mov.l %O0,%1; braf %1";
1079 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1082 jump = "mov.l %O0,%1; jmp @%1";
1084 /* If we have a scratch register available, use it. */
1085 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1086 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1088 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1089 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1090 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1091 output_asm_insn (jump, &this.lab);
1092 if (dbr_sequence_length ())
1093 print_slot (final_sequence);
1095 output_asm_insn ("nop", 0);
1099 /* Output the delay slot insn first if any. */
1100 if (dbr_sequence_length ())
1101 print_slot (final_sequence);
1103 this.reg = gen_rtx_REG (SImode, 13);
1104 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1105 Fortunately, MACL is fixed and call-clobbered, and we never
1106 need its value across jumps, so save r13 in it instead of in
1109 output_asm_insn ("lds r13, macl", 0);
1111 output_asm_insn ("mov.l r13,@-r15", 0);
1112 output_asm_insn (jump, &this.lab);
1114 output_asm_insn ("sts macl, r13", 0);
1116 output_asm_insn ("mov.l @r15+,r13", 0);
1118 if (far && flag_pic && TARGET_SH2)
1120 braf_base_lab = gen_label_rtx ();
1121 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1122 CODE_LABEL_NUMBER (braf_base_lab));
1125 output_asm_insn (".align 2", 0);
1126 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1128 if (far && flag_pic)
1131 this.lab = braf_base_lab;
1132 output_asm_insn (".long %O2-%O0", &this.lab);
1135 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1139 /* Local label counter, used for constants in the pool and inside
1140 pattern branches. */
1142 static int lf = 100;
1144 /* Output code for ordinary branches. */
1147 output_branch (logic, insn, operands)
1152 switch (get_attr_length (insn))
1155 /* This can happen if filling the delay slot has caused a forward
1156 branch to exceed its range (we could reverse it, but only
1157 when we know we won't overextend other branches; this should
1158 best be handled by relaxation).
1159 It can also happen when other condbranches hoist delay slot insn
1160 from their destination, thus leading to code size increase.
1161 But the branch will still be in the range -4092..+4098 bytes. */
1166 /* The call to print_slot will clobber the operands. */
1167 rtx op0 = operands[0];
1169 /* If the instruction in the delay slot is annulled (true), then
1170 there is no delay slot where we can put it now. The only safe
1171 place for it is after the label. final will do that by default. */
1174 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1176 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1177 ASSEMBLER_DIALECT ? "/" : ".", label);
1178 print_slot (final_sequence);
1181 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1183 output_asm_insn ("bra\t%l0", &op0);
1184 fprintf (asm_out_file, "\tnop\n");
1185 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1189 /* When relaxing, handle this like a short branch. The linker
1190 will fix it up if it still doesn't fit after relaxation. */
1192 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1194 /* These are for SH2e, in which we have to account for the
1195 extra nop because of the hardware bug in annulled branches. */
1202 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1204 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1206 ASSEMBLER_DIALECT ? "/" : ".", label);
1207 fprintf (asm_out_file, "\tnop\n");
1208 output_asm_insn ("bra\t%l0", operands);
1209 fprintf (asm_out_file, "\tnop\n");
1210 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1214 /* When relaxing, fall through. */
1219 sprintf (buffer, "b%s%ss\t%%l0",
1221 ASSEMBLER_DIALECT ? "/" : ".");
1222 output_asm_insn (buffer, &operands[0]);
1227 /* There should be no longer branches now - that would
1228 indicate that something has destroyed the branches set
1229 up in machine_dependent_reorg. */
1235 output_branchy_insn (code, template, insn, operands)
1237 const char *template;
1241 rtx next_insn = NEXT_INSN (insn);
1243 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1245 rtx src = SET_SRC (PATTERN (next_insn));
1246 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1248 /* Following branch not taken */
1249 operands[9] = gen_label_rtx ();
1250 emit_label_after (operands[9], next_insn);
1251 INSN_ADDRESSES_NEW (operands[9],
1252 INSN_ADDRESSES (INSN_UID (next_insn))
1253 + get_attr_length (next_insn));
1258 int offset = (branch_dest (next_insn)
1259 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1260 if (offset >= -252 && offset <= 258)
1262 if (GET_CODE (src) == IF_THEN_ELSE)
1264 src = XEXP (src, 1);
1270 operands[9] = gen_label_rtx ();
1271 emit_label_after (operands[9], insn);
1272 INSN_ADDRESSES_NEW (operands[9],
1273 INSN_ADDRESSES (INSN_UID (insn))
1274 + get_attr_length (insn));
1279 output_ieee_ccmpeq (insn, operands)
1280 rtx insn, *operands;
1282 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1285 /* Output to FILE the start of the assembler file. */
1288 output_file_start (file)
1291 output_file_directive (file, main_input_filename);
1293 /* Switch to the data section so that the coffsem symbol
1294 isn't in the text section. */
1297 if (TARGET_LITTLE_ENDIAN)
1298 fprintf (file, "\t.little\n");
1300 if (TARGET_SHCOMPACT)
1301 fprintf (file, "\t.mode\tSHcompact\n");
1302 else if (TARGET_SHMEDIA)
1303 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1304 TARGET_SHMEDIA64 ? 64 : 32);
1307 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1310 unspec_caller_rtx_p (pat)
1313 switch (GET_CODE (pat))
1316 return unspec_caller_rtx_p (XEXP (pat, 0));
1319 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1321 return unspec_caller_rtx_p (XEXP (pat, 1));
1323 if (XINT (pat, 1) == UNSPEC_CALLER)
1332 /* Indicate that INSN cannot be duplicated. This is true for insn
1333 that generates an unique label. */
1336 sh_cannot_copy_insn_p (insn)
1341 if (!reload_completed || !flag_pic)
1344 if (GET_CODE (insn) != INSN)
1346 if (asm_noperands (insn) >= 0)
1349 pat = PATTERN (insn);
1350 if (GET_CODE (pat) != SET)
1352 pat = SET_SRC (pat);
1354 if (unspec_caller_rtx_p (pat))
1360 /* Actual number of instructions used to make a shift by N. */
1361 static const char ashiftrt_insns[] =
1362 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1364 /* Left shift and logical right shift are the same. */
1365 static const char shift_insns[] =
1366 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1368 /* Individual shift amounts needed to get the above length sequences.
1369 One bit right shifts clobber the T bit, so when possible, put one bit
1370 shifts in the middle of the sequence, so the ends are eligible for
1371 branch delay slots. */
1372 static const short shift_amounts[32][5] = {
1373 {0}, {1}, {2}, {2, 1},
1374 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1375 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1376 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1377 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1378 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1379 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1380 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1382 /* Likewise, but for shift amounts < 16, up to three highmost bits
1383 might be clobbered. This is typically used when combined with some
1384 kind of sign or zero extension. */
1386 static const char ext_shift_insns[] =
1387 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1389 static const short ext_shift_amounts[32][4] = {
1390 {0}, {1}, {2}, {2, 1},
1391 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1392 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1393 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1394 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1395 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1396 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1397 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1399 /* Assuming we have a value that has been sign-extended by at least one bit,
1400 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1401 to shift it by N without data loss, and quicker than by other means? */
1402 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1404 /* This is used in length attributes in sh.md to help compute the length
1405 of arbitrary constant shift instructions. */
1408 shift_insns_rtx (insn)
1411 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1412 int shift_count = INTVAL (XEXP (set_src, 1));
1413 enum rtx_code shift_code = GET_CODE (set_src);
1418 return ashiftrt_insns[shift_count];
1421 return shift_insns[shift_count];
1427 /* Return the cost of a shift. */
1438 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1440 if (GET_MODE (x) == DImode
1441 && GET_CODE (XEXP (x, 1)) == CONST_INT
1442 && INTVAL (XEXP (x, 1)) == 1)
1445 /* Everything else is invalid, because there is no pattern for it. */
1448 /* If shift by a non constant, then this will be expensive. */
1449 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1450 return SH_DYNAMIC_SHIFT_COST;
1452 value = INTVAL (XEXP (x, 1));
1454 /* Otherwise, return the true cost in instructions. */
1455 if (GET_CODE (x) == ASHIFTRT)
1457 int cost = ashiftrt_insns[value];
1458 /* If SH3, then we put the constant in a reg and use shad. */
1459 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1460 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1464 return shift_insns[value];
1467 /* Return the cost of an AND operation. */
1475 /* Anding with a register is a single cycle and instruction. */
1476 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1479 i = INTVAL (XEXP (x, 1));
1483 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1484 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1485 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1491 /* These constants are single cycle extu.[bw] instructions. */
1492 if (i == 0xff || i == 0xffff)
1494 /* Constants that can be used in an and immediate instruction is a single
1495 cycle, but this requires r0, so make it a little more expensive. */
1496 if (CONST_OK_FOR_L (i))
1498 /* Constants that can be loaded with a mov immediate and an and.
1499 This case is probably unnecessary. */
1500 if (CONST_OK_FOR_I (i))
1502 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1503 This case is probably unnecessary. */
1507 /* Return the cost of an addition or a subtraction. */
1513 /* Adding a register is a single cycle insn. */
1514 if (GET_CODE (XEXP (x, 1)) == REG
1515 || GET_CODE (XEXP (x, 1)) == SUBREG)
1518 /* Likewise for small constants. */
1519 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1520 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1524 switch (GET_CODE (XEXP (x, 1)))
1529 return TARGET_SHMEDIA64 ? 5 : 3;
1532 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1534 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1536 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1544 /* Any other constant requires a 2 cycle pc-relative load plus an
1549 /* Return the cost of a multiply. */
1552 rtx x ATTRIBUTE_UNUSED;
1559 /* We have a mul insn, so we can never take more than the mul and the
1560 read of the mac reg, but count more because of the latency and extra
1562 if (TARGET_SMALLCODE)
1567 /* If we're aiming at small code, then just count the number of
1568 insns in a multiply call sequence. */
1569 if (TARGET_SMALLCODE)
1572 /* Otherwise count all the insns in the routine we'd be calling too. */
1576 /* Compute a (partial) cost for rtx X. Return true if the complete
1577 cost has been computed, and false if subexpressions should be
1578 scanned. In either case, *TOTAL contains the cost result. */
1581 sh_rtx_costs (x, code, outer_code, total)
1583 int code, outer_code, *total;
1590 if (INTVAL (x) == 0)
1592 else if (outer_code == AND && and_operand ((x), DImode))
1594 else if ((outer_code == IOR || outer_code == XOR
1595 || outer_code == PLUS)
1596 && CONST_OK_FOR_P (INTVAL (x)))
1598 else if (CONST_OK_FOR_J (INTVAL (x)))
1599 *total = COSTS_N_INSNS (outer_code != SET);
1600 else if (CONST_OK_FOR_J (INTVAL (x) >> 16))
1601 *total = COSTS_N_INSNS (2);
1602 else if (CONST_OK_FOR_J ((INTVAL (x) >> 16) >> 16))
1603 *total = COSTS_N_INSNS (3);
1605 *total = COSTS_N_INSNS (4);
1608 if (CONST_OK_FOR_I (INTVAL (x)))
1610 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1611 && CONST_OK_FOR_L (INTVAL (x)))
1620 if (TARGET_SHMEDIA64)
1621 *total = COSTS_N_INSNS (4);
1622 else if (TARGET_SHMEDIA32)
1623 *total = COSTS_N_INSNS (2);
1630 *total = COSTS_N_INSNS (4);
1636 *total = COSTS_N_INSNS (addsubcosts (x));
1640 *total = COSTS_N_INSNS (andcosts (x));
1644 *total = COSTS_N_INSNS (multcosts (x));
1650 *total = COSTS_N_INSNS (shiftcosts (x));
1657 *total = COSTS_N_INSNS (20);
1670 /* Compute the cost of an address. For the SH, all valid addresses are
1671 the same cost. Use a slightly higher cost for reg + reg addressing,
1672 since it increases pressure on r0. */
1678 return (GET_CODE (X) == PLUS
1679 && ! CONSTANT_P (XEXP (X, 1))
1680 && ! TARGET_SHMEDIA ? 1 : 0);
1683 /* Code to expand a shift. */
1686 gen_ashift (type, n, reg)
1691 /* Negative values here come from the shift_amounts array. */
1704 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1708 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1710 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1713 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1718 /* Same for HImode */
1721 gen_ashift_hi (type, n, reg)
1726 /* Negative values here come from the shift_amounts array. */
1740 /* We don't have HImode right shift operations because using the
1741 ordinary 32 bit shift instructions for that doesn't generate proper
1742 zero/sign extension.
1743 gen_ashift_hi is only called in contexts where we know that the
1744 sign extension works out correctly. */
1747 if (GET_CODE (reg) == SUBREG)
1749 offset = SUBREG_BYTE (reg);
1750 reg = SUBREG_REG (reg);
1752 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1756 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1761 /* Output RTL to split a constant shift into its component SH constant
1762 shift instructions. */
1765 gen_shifty_op (code, operands)
1769 int value = INTVAL (operands[2]);
1772 /* Truncate the shift count in case it is out of bounds. */
1773 value = value & 0x1f;
1777 if (code == LSHIFTRT)
1779 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1780 emit_insn (gen_movt (operands[0]));
1783 else if (code == ASHIFT)
1785 /* There is a two instruction sequence for 31 bit left shifts,
1786 but it requires r0. */
1787 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1789 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1790 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1795 else if (value == 0)
1797 /* This can happen when not optimizing. We must output something here
1798 to prevent the compiler from aborting in final.c after the try_split
1800 emit_insn (gen_nop ());
1804 max = shift_insns[value];
1805 for (i = 0; i < max; i++)
1806 gen_ashift (code, shift_amounts[value][i], operands[0]);
1809 /* Same as above, but optimized for values where the topmost bits don't
1813 gen_shifty_hi_op (code, operands)
1817 int value = INTVAL (operands[2]);
1819 void (*gen_fun) PARAMS ((int, int, rtx));
1821 /* This operation is used by and_shl for SImode values with a few
1822 high bits known to be cleared. */
1826 emit_insn (gen_nop ());
1830 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1833 max = ext_shift_insns[value];
1834 for (i = 0; i < max; i++)
1835 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1838 /* When shifting right, emit the shifts in reverse order, so that
1839 solitary negative values come first. */
1840 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1841 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1844 /* Output RTL for an arithmetic right shift. */
1846 /* ??? Rewrite to use super-optimizer sequences. */
1849 expand_ashiftrt (operands)
1860 if (GET_CODE (operands[2]) != CONST_INT)
1862 rtx count = copy_to_mode_reg (SImode, operands[2]);
1863 emit_insn (gen_negsi2 (count, count));
1864 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1867 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1868 > 1 + SH_DYNAMIC_SHIFT_COST)
1871 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1872 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1876 if (GET_CODE (operands[2]) != CONST_INT)
1879 value = INTVAL (operands[2]) & 31;
1883 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1886 else if (value >= 16 && value <= 19)
1888 wrk = gen_reg_rtx (SImode);
1889 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1892 gen_ashift (ASHIFTRT, 1, wrk);
1893 emit_move_insn (operands[0], wrk);
1896 /* Expand a short sequence inline, longer call a magic routine. */
1897 else if (value <= 5)
1899 wrk = gen_reg_rtx (SImode);
1900 emit_move_insn (wrk, operands[1]);
1902 gen_ashift (ASHIFTRT, 1, wrk);
1903 emit_move_insn (operands[0], wrk);
1907 wrk = gen_reg_rtx (Pmode);
1909 /* Load the value into an arg reg and call a helper. */
1910 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1911 sprintf (func, "__ashiftrt_r4_%d", value);
1912 func_name = get_identifier (func);
1913 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1914 emit_move_insn (wrk, sym);
1915 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1916 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1921 sh_dynamicalize_shift_p (count)
1924 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1927 /* Try to find a good way to implement the combiner pattern
1928 [(set (match_operand:SI 0 "register_operand" "r")
1929 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1930 (match_operand:SI 2 "const_int_operand" "n"))
1931 (match_operand:SI 3 "const_int_operand" "n"))) .
1932 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1933 return 0 for simple right / left or left/right shift combination.
1934 return 1 for a combination of shifts with zero_extend.
1935 return 2 for a combination of shifts with an AND that needs r0.
1936 return 3 for a combination of shifts with an AND that needs an extra
1937 scratch register, when the three highmost bits of the AND mask are clear.
1938 return 4 for a combination of shifts with an AND that needs an extra
1939 scratch register, when any of the three highmost bits of the AND mask
1941 If ATTRP is set, store an initial right shift width in ATTRP[0],
1942 and the instruction length in ATTRP[1] . These values are not valid
1944 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1945 shift_amounts for the last shift value that is to be used before the
1948 shl_and_kind (left_rtx, mask_rtx, attrp)
1949 rtx left_rtx, mask_rtx;
1952 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1953 int left = INTVAL (left_rtx), right;
1955 int cost, best_cost = 10000;
1956 int best_right = 0, best_len = 0;
1960 if (left < 0 || left > 31)
1962 if (GET_CODE (mask_rtx) == CONST_INT)
1963 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1965 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1966 /* Can this be expressed as a right shift / left shift pair ? */
1967 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1968 right = exact_log2 (lsb);
1969 mask2 = ~(mask + lsb - 1);
1970 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1971 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1973 best_cost = shift_insns[right] + shift_insns[right + left];
1974 /* mask has no trailing zeroes <==> ! right */
1975 else if (! right && mask2 == ~(lsb2 - 1))
1977 int late_right = exact_log2 (lsb2);
1978 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1980 /* Try to use zero extend */
1981 if (mask2 == ~(lsb2 - 1))
1985 for (width = 8; width <= 16; width += 8)
1987 /* Can we zero-extend right away? */
1988 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1991 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1992 if (cost < best_cost)
2003 /* ??? Could try to put zero extend into initial right shift,
2004 or even shift a bit left before the right shift. */
2005 /* Determine value of first part of left shift, to get to the
2006 zero extend cut-off point. */
2007 first = width - exact_log2 (lsb2) + right;
2008 if (first >= 0 && right + left - first >= 0)
2010 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2011 + ext_shift_insns[right + left - first];
2012 if (cost < best_cost)
2024 /* Try to use r0 AND pattern */
2025 for (i = 0; i <= 2; i++)
2029 if (! CONST_OK_FOR_L (mask >> i))
2031 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2032 if (cost < best_cost)
2037 best_len = cost - 1;
2040 /* Try to use a scratch register to hold the AND operand. */
2041 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2042 for (i = 0; i <= 2; i++)
2046 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
2047 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2048 if (cost < best_cost)
2053 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
2059 attrp[0] = best_right;
2060 attrp[1] = best_len;
2065 /* This is used in length attributes of the unnamed instructions
2066 corresponding to shl_and_kind return values of 1 and 2. */
2068 shl_and_length (insn)
2071 rtx set_src, left_rtx, mask_rtx;
2074 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2075 left_rtx = XEXP (XEXP (set_src, 0), 1);
2076 mask_rtx = XEXP (set_src, 1);
2077 shl_and_kind (left_rtx, mask_rtx, attributes);
2078 return attributes[1];
2081 /* This is used in length attribute of the and_shl_scratch instruction. */
2084 shl_and_scr_length (insn)
2087 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2088 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2089 rtx op = XEXP (set_src, 0);
2090 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2091 op = XEXP (XEXP (op, 0), 0);
2092 return len + shift_insns[INTVAL (XEXP (op, 1))];
2095 /* Generating rtl? */
2096 extern int rtx_equal_function_value_matters;
2098 /* Generate rtl for instructions for which shl_and_kind advised a particular
2099 method of generating them, i.e. returned zero. */
2102 gen_shl_and (dest, left_rtx, mask_rtx, source)
2103 rtx dest, left_rtx, mask_rtx, source;
2106 unsigned HOST_WIDE_INT mask;
2107 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2108 int right, total_shift;
2109 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
2111 right = attributes[0];
2112 total_shift = INTVAL (left_rtx) + right;
2113 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2120 int first = attributes[2];
2125 emit_insn ((mask << right) <= 0xff
2126 ? gen_zero_extendqisi2(dest,
2127 gen_lowpart (QImode, source))
2128 : gen_zero_extendhisi2(dest,
2129 gen_lowpart (HImode, source)));
2133 emit_insn (gen_movsi (dest, source));
2137 operands[2] = GEN_INT (right);
2138 gen_shifty_hi_op (LSHIFTRT, operands);
2142 operands[2] = GEN_INT (first);
2143 gen_shifty_hi_op (ASHIFT, operands);
2144 total_shift -= first;
2148 emit_insn (mask <= 0xff
2149 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2150 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2151 if (total_shift > 0)
2153 operands[2] = GEN_INT (total_shift);
2154 gen_shifty_hi_op (ASHIFT, operands);
2159 shift_gen_fun = gen_shifty_op;
2161 /* If the topmost bit that matters is set, set the topmost bits
2162 that don't matter. This way, we might be able to get a shorter
2164 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2165 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2167 /* Don't expand fine-grained when combining, because that will
2168 make the pattern fail. */
2169 if (rtx_equal_function_value_matters
2170 || reload_in_progress || reload_completed)
2174 /* Cases 3 and 4 should be handled by this split
2175 only while combining */
2180 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2183 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2188 operands[2] = GEN_INT (total_shift);
2189 shift_gen_fun (ASHIFT, operands);
2196 if (kind != 4 && total_shift < 16)
2198 neg = -ext_shift_amounts[total_shift][1];
2200 neg -= ext_shift_amounts[total_shift][2];
2204 emit_insn (gen_and_shl_scratch (dest, source,
2207 GEN_INT (total_shift + neg),
2209 emit_insn (gen_movsi (dest, dest));
2216 /* Try to find a good way to implement the combiner pattern
2217 [(set (match_operand:SI 0 "register_operand" "=r")
2218 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2219 (match_operand:SI 2 "const_int_operand" "n")
2220 (match_operand:SI 3 "const_int_operand" "n")
2222 (clobber (reg:SI T_REG))]
2223 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2224 return 0 for simple left / right shift combination.
2225 return 1 for left shift / 8 bit sign extend / left shift.
2226 return 2 for left shift / 16 bit sign extend / left shift.
2227 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2228 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2229 return 5 for left shift / 16 bit sign extend / right shift
2230 return 6 for < 8 bit sign extend / left shift.
2231 return 7 for < 8 bit sign extend / left shift / single right shift.
2232 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2235 shl_sext_kind (left_rtx, size_rtx, costp)
2236 rtx left_rtx, size_rtx;
2239 int left, size, insize, ext;
2240 int cost = 0, best_cost;
2243 left = INTVAL (left_rtx);
2244 size = INTVAL (size_rtx);
2245 insize = size - left;
2248 /* Default to left / right shift. */
2250 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2253 /* 16 bit shift / sign extend / 16 bit shift */
2254 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2255 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2256 below, by alternative 3 or something even better. */
2257 if (cost < best_cost)
2263 /* Try a plain sign extend between two shifts. */
2264 for (ext = 16; ext >= insize; ext -= 8)
2268 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2269 if (cost < best_cost)
2271 kind = ext / (unsigned) 8;
2275 /* Check if we can do a sloppy shift with a final signed shift
2276 restoring the sign. */
2277 if (EXT_SHIFT_SIGNED (size - ext))
2278 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2279 /* If not, maybe it's still cheaper to do the second shift sloppy,
2280 and do a final sign extend? */
2281 else if (size <= 16)
2282 cost = ext_shift_insns[ext - insize] + 1
2283 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2286 if (cost < best_cost)
2288 kind = ext / (unsigned) 8 + 2;
2292 /* Check if we can sign extend in r0 */
2295 cost = 3 + shift_insns[left];
2296 if (cost < best_cost)
2301 /* Try the same with a final signed shift. */
2304 cost = 3 + ext_shift_insns[left + 1] + 1;
2305 if (cost < best_cost)
2314 /* Try to use a dynamic shift. */
2315 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2316 if (cost < best_cost)
2327 /* Function to be used in the length attribute of the instructions
2328 implementing this pattern. */
2331 shl_sext_length (insn)
2334 rtx set_src, left_rtx, size_rtx;
2337 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2338 left_rtx = XEXP (XEXP (set_src, 0), 1);
2339 size_rtx = XEXP (set_src, 1);
2340 shl_sext_kind (left_rtx, size_rtx, &cost);
2344 /* Generate rtl for this pattern */
2347 gen_shl_sext (dest, left_rtx, size_rtx, source)
2348 rtx dest, left_rtx, size_rtx, source;
2351 int left, size, insize, cost;
2354 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2355 left = INTVAL (left_rtx);
2356 size = INTVAL (size_rtx);
2357 insize = size - left;
2365 int ext = kind & 1 ? 8 : 16;
2366 int shift2 = size - ext;
2368 /* Don't expand fine-grained when combining, because that will
2369 make the pattern fail. */
2370 if (! rtx_equal_function_value_matters
2371 && ! reload_in_progress && ! reload_completed)
2373 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2374 emit_insn (gen_movsi (dest, source));
2378 emit_insn (gen_movsi (dest, source));
2382 operands[2] = GEN_INT (ext - insize);
2383 gen_shifty_hi_op (ASHIFT, operands);
2386 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2387 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2392 operands[2] = GEN_INT (shift2);
2393 gen_shifty_op (ASHIFT, operands);
2400 if (EXT_SHIFT_SIGNED (shift2))
2402 operands[2] = GEN_INT (shift2 + 1);
2403 gen_shifty_op (ASHIFT, operands);
2404 operands[2] = GEN_INT (1);
2405 gen_shifty_op (ASHIFTRT, operands);
2408 operands[2] = GEN_INT (shift2);
2409 gen_shifty_hi_op (ASHIFT, operands);
2413 operands[2] = GEN_INT (-shift2);
2414 gen_shifty_hi_op (LSHIFTRT, operands);
2416 emit_insn (size <= 8
2417 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2418 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2425 if (! rtx_equal_function_value_matters
2426 && ! reload_in_progress && ! reload_completed)
2427 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2431 operands[2] = GEN_INT (16 - insize);
2432 gen_shifty_hi_op (ASHIFT, operands);
2433 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2435 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2437 gen_ashift (ASHIFTRT, 1, dest);
2442 /* Don't expand fine-grained when combining, because that will
2443 make the pattern fail. */
2444 if (! rtx_equal_function_value_matters
2445 && ! reload_in_progress && ! reload_completed)
2447 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2448 emit_insn (gen_movsi (dest, source));
2451 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2452 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2453 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2455 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2456 gen_shifty_op (ASHIFT, operands);
2458 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2466 /* Prefix a symbol_ref name with "datalabel". */
2469 gen_datalabel_ref (sym)
2472 if (GET_CODE (sym) == LABEL_REF)
2473 return gen_rtx_CONST (GET_MODE (sym),
2474 gen_rtx_UNSPEC (GET_MODE (sym),
2478 if (GET_CODE (sym) != SYMBOL_REF)
2481 XSTR (sym, 0) = concat (SH_DATALABEL_ENCODING, XSTR (sym, 0), NULL);
2487 /* The SH cannot load a large constant into a register, constants have to
2488 come from a pc relative load. The reference of a pc relative load
2489 instruction must be less than 1k infront of the instruction. This
2490 means that we often have to dump a constant inside a function, and
2491 generate code to branch around it.
2493 It is important to minimize this, since the branches will slow things
2494 down and make things bigger.
2496 Worst case code looks like:
2514 We fix this by performing a scan before scheduling, which notices which
2515 instructions need to have their operands fetched from the constant table
2516 and builds the table.
2520 scan, find an instruction which needs a pcrel move. Look forward, find the
2521 last barrier which is within MAX_COUNT bytes of the requirement.
2522 If there isn't one, make one. Process all the instructions between
2523 the find and the barrier.
2525 In the above example, we can tell that L3 is within 1k of L1, so
2526 the first move can be shrunk from the 3 insn+constant sequence into
2527 just 1 insn, and the constant moved to L3 to make:
2538 Then the second move becomes the target for the shortening process. */
2542 rtx value; /* Value in table. */
2543 rtx label; /* Label of value. */
2544 rtx wend; /* End of window. */
2545 enum machine_mode mode; /* Mode of value. */
2547 /* True if this constant is accessed as part of a post-increment
2548 sequence. Note that HImode constants are never accessed in this way. */
2549 bool part_of_sequence_p;
2552 /* The maximum number of constants that can fit into one pool, since
2553 the pc relative range is 0...1020 bytes and constants are at least 4
2556 #define MAX_POOL_SIZE (1020/4)
2557 static pool_node pool_vector[MAX_POOL_SIZE];
2558 static int pool_size;
2559 static rtx pool_window_label;
2560 static int pool_window_last;
2562 /* ??? If we need a constant in HImode which is the truncated value of a
2563 constant we need in SImode, we could combine the two entries thus saving
2564 two bytes. Is this common enough to be worth the effort of implementing
2567 /* ??? This stuff should be done at the same time that we shorten branches.
2568 As it is now, we must assume that all branches are the maximum size, and
2569 this causes us to almost always output constant pools sooner than
2572 /* Add a constant to the pool and return its label. */
2575 add_constant (x, mode, last_value)
2577 enum machine_mode mode;
2581 rtx lab, new, ref, newref;
2583 /* First see if we've already got it. */
2584 for (i = 0; i < pool_size; i++)
2586 if (x->code == pool_vector[i].value->code
2587 && mode == pool_vector[i].mode)
2589 if (x->code == CODE_LABEL)
2591 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2594 if (rtx_equal_p (x, pool_vector[i].value))
2599 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2601 new = gen_label_rtx ();
2602 LABEL_REFS (new) = pool_vector[i].label;
2603 pool_vector[i].label = lab = new;
2605 if (lab && pool_window_label)
2607 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2608 ref = pool_vector[pool_window_last].wend;
2609 LABEL_NEXTREF (newref) = ref;
2610 pool_vector[pool_window_last].wend = newref;
2613 pool_window_label = new;
2614 pool_window_last = i;
2620 /* Need a new one. */
2621 pool_vector[pool_size].value = x;
2622 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2625 pool_vector[pool_size - 1].part_of_sequence_p = true;
2628 lab = gen_label_rtx ();
2629 pool_vector[pool_size].mode = mode;
2630 pool_vector[pool_size].label = lab;
2631 pool_vector[pool_size].wend = NULL_RTX;
2632 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2633 if (lab && pool_window_label)
2635 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2636 ref = pool_vector[pool_window_last].wend;
2637 LABEL_NEXTREF (newref) = ref;
2638 pool_vector[pool_window_last].wend = newref;
2641 pool_window_label = lab;
2642 pool_window_last = pool_size;
2647 /* Output the literal table. */
2658 /* Do two passes, first time dump out the HI sized constants. */
2660 for (i = 0; i < pool_size; i++)
2662 pool_node *p = &pool_vector[i];
2664 if (p->mode == HImode)
2668 scan = emit_insn_after (gen_align_2 (), scan);
2671 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2672 scan = emit_label_after (lab, scan);
2673 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2675 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2677 lab = XEXP (ref, 0);
2678 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2681 else if (p->mode == DFmode)
2687 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2689 rtx align_insn = NULL_RTX;
2691 scan = emit_label_after (gen_label_rtx (), scan);
2692 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2695 for (i = 0; i < pool_size; i++)
2697 pool_node *p = &pool_vector[i];
2705 if (align_insn && !p->part_of_sequence_p)
2707 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2708 emit_label_before (lab, align_insn);
2709 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2711 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2713 lab = XEXP (ref, 0);
2714 emit_insn_before (gen_consttable_window_end (lab),
2717 delete_insn (align_insn);
2718 align_insn = NULL_RTX;
2723 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2724 scan = emit_label_after (lab, scan);
2725 scan = emit_insn_after (gen_consttable_4 (p->value,
2727 need_align = ! need_align;
2733 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2738 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2739 scan = emit_label_after (lab, scan);
2740 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2748 if (p->mode != HImode)
2750 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2752 lab = XEXP (ref, 0);
2753 scan = emit_insn_after (gen_consttable_window_end (lab),
2762 for (i = 0; i < pool_size; i++)
2764 pool_node *p = &pool_vector[i];
2775 scan = emit_label_after (gen_label_rtx (), scan);
2776 scan = emit_insn_after (gen_align_4 (), scan);
2778 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2779 scan = emit_label_after (lab, scan);
2780 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2788 scan = emit_label_after (gen_label_rtx (), scan);
2789 scan = emit_insn_after (gen_align_4 (), scan);
2791 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2792 scan = emit_label_after (lab, scan);
2793 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2801 if (p->mode != HImode)
2803 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2805 lab = XEXP (ref, 0);
2806 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2811 scan = emit_insn_after (gen_consttable_end (), scan);
2812 scan = emit_barrier_after (scan);
2814 pool_window_label = NULL_RTX;
2815 pool_window_last = 0;
2818 /* Return nonzero if constant would be an ok source for a
2819 mov.w instead of a mov.l. */
2825 return (GET_CODE (src) == CONST_INT
2826 && INTVAL (src) >= -32768
2827 && INTVAL (src) <= 32767);
2830 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2832 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2833 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2834 need to fix it if the input value is CONST_OK_FOR_I. */
2840 if (GET_CODE (insn) == INSN)
2842 rtx pat = PATTERN (insn);
2843 if (GET_CODE (pat) == PARALLEL)
2844 pat = XVECEXP (pat, 0, 0);
2845 if (GET_CODE (pat) == SET
2846 /* We can load any 8 bit value if we don't care what the high
2847 order bits end up as. */
2848 && GET_MODE (SET_DEST (pat)) != QImode
2849 && (CONSTANT_P (SET_SRC (pat))
2850 /* Match mova_const. */
2851 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2852 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2853 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2855 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2856 && (fp_zero_operand (SET_SRC (pat))
2857 || fp_one_operand (SET_SRC (pat)))
2858 /* ??? If this is a -m4 or -m4-single compilation, in general
2859 we don't know the current setting of fpscr, so disable fldi.
2860 There is an exception if this was a register-register move
2861 before reload - and hence it was ascertained that we have
2862 single precision setting - and in a post-reload optimization
2863 we changed this to do a constant load. In that case
2864 we don't have an r0 clobber, hence we must use fldi. */
2865 && (! TARGET_SH4 || TARGET_FMOVD
2866 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2868 && GET_CODE (SET_DEST (pat)) == REG
2869 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2870 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2871 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2882 return (GET_CODE (insn) == INSN
2883 && GET_CODE (PATTERN (insn)) == SET
2884 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2885 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2886 /* Don't match mova_const. */
2887 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2890 /* Find the last barrier from insn FROM which is close enough to hold the
2891 constant pool. If we can't find one, then create one near the end of
2895 find_barrier (num_mova, mova, from)
2906 int leading_mova = num_mova;
2907 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2911 /* For HImode: range is 510, add 4 because pc counts from address of
2912 second instruction after this one, subtract 2 for the jump instruction
2913 that we may need to emit before the table, subtract 2 for the instruction
2914 that fills the jump delay slot (in very rare cases, reorg will take an
2915 instruction from after the constant pool or will leave the delay slot
2916 empty). This gives 510.
2917 For SImode: range is 1020, add 4 because pc counts from address of
2918 second instruction after this one, subtract 2 in case pc is 2 byte
2919 aligned, subtract 2 for the jump instruction that we may need to emit
2920 before the table, subtract 2 for the instruction that fills the jump
2921 delay slot. This gives 1018. */
2923 /* The branch will always be shortened now that the reference address for
2924 forward branches is the successor address, thus we need no longer make
2925 adjustments to the [sh]i_limit for -O0. */
2930 while (from && count_si < si_limit && count_hi < hi_limit)
2932 int inc = get_attr_length (from);
2935 if (GET_CODE (from) == CODE_LABEL)
2938 new_align = 1 << label_to_alignment (from);
2939 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2940 new_align = 1 << barrier_align (from);
2946 if (GET_CODE (from) == BARRIER)
2949 found_barrier = from;
2951 /* If we are at the end of the function, or in front of an alignment
2952 instruction, we need not insert an extra alignment. We prefer
2953 this kind of barrier. */
2954 if (barrier_align (from) > 2)
2955 good_barrier = from;
2958 if (broken_move (from))
2961 enum machine_mode mode;
2963 pat = PATTERN (from);
2964 if (GET_CODE (pat) == PARALLEL)
2965 pat = XVECEXP (pat, 0, 0);
2966 src = SET_SRC (pat);
2967 dst = SET_DEST (pat);
2968 mode = GET_MODE (dst);
2970 /* We must explicitly check the mode, because sometimes the
2971 front end will generate code to load unsigned constants into
2972 HImode targets without properly sign extending them. */
2974 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2977 /* We put the short constants before the long constants, so
2978 we must count the length of short constants in the range
2979 for the long constants. */
2980 /* ??? This isn't optimal, but is easy to do. */
2985 /* We dump DF/DI constants before SF/SI ones, because
2986 the limit is the same, but the alignment requirements
2987 are higher. We may waste up to 4 additional bytes
2988 for alignment, and the DF/DI constant may have
2989 another SF/SI constant placed before it. */
2990 if (TARGET_SHCOMPACT
2992 && (mode == DFmode || mode == DImode))
2997 while (si_align > 2 && found_si + si_align - 2 > count_si)
2999 if (found_si > count_si)
3000 count_si = found_si;
3001 found_si += GET_MODE_SIZE (mode);
3003 si_limit -= GET_MODE_SIZE (mode);
3006 /* See the code in machine_dependent_reorg, which has a similar if
3007 statement that generates a new mova insn in many cases. */
3008 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3018 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3020 if (found_si > count_si)
3021 count_si = found_si;
3023 else if (GET_CODE (from) == JUMP_INSN
3024 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3025 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3029 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3031 /* We have just passed the barrier in front of the
3032 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3033 the ADDR_DIFF_VEC is accessed as data, just like our pool
3034 constants, this is a good opportunity to accommodate what
3035 we have gathered so far.
3036 If we waited any longer, we could end up at a barrier in
3037 front of code, which gives worse cache usage for separated
3038 instruction / data caches. */
3039 good_barrier = found_barrier;
3044 rtx body = PATTERN (from);
3045 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3048 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3049 else if (GET_CODE (from) == JUMP_INSN
3051 && ! TARGET_SMALLCODE)
3057 if (new_align > si_align)
3059 si_limit -= (count_si - 1) & (new_align - si_align);
3060 si_align = new_align;
3062 count_si = (count_si + new_align - 1) & -new_align;
3067 if (new_align > hi_align)
3069 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3070 hi_align = new_align;
3072 count_hi = (count_hi + new_align - 1) & -new_align;
3074 from = NEXT_INSN (from);
3081 /* Try as we might, the leading mova is out of range. Change
3082 it into a load (which will become a pcload) and retry. */
3083 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3084 INSN_CODE (mova) = -1;
3085 return find_barrier (0, 0, mova);
3089 /* Insert the constant pool table before the mova instruction,
3090 to prevent the mova label reference from going out of range. */
3092 good_barrier = found_barrier = barrier_before_mova;
3098 if (good_barrier && next_real_insn (found_barrier))
3099 found_barrier = good_barrier;
3103 /* We didn't find a barrier in time to dump our stuff,
3104 so we'll make one. */
3105 rtx label = gen_label_rtx ();
3107 /* If we exceeded the range, then we must back up over the last
3108 instruction we looked at. Otherwise, we just need to undo the
3109 NEXT_INSN at the end of the loop. */
3110 if (count_hi > hi_limit || count_si > si_limit)
3111 from = PREV_INSN (PREV_INSN (from));
3113 from = PREV_INSN (from);
3115 /* Walk back to be just before any jump or label.
3116 Putting it before a label reduces the number of times the branch
3117 around the constant pool table will be hit. Putting it before
3118 a jump makes it more likely that the bra delay slot will be
3120 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3121 || GET_CODE (from) == CODE_LABEL)
3122 from = PREV_INSN (from);
3124 from = emit_jump_insn_after (gen_jump (label), from);
3125 JUMP_LABEL (from) = label;
3126 LABEL_NUSES (label) = 1;
3127 found_barrier = emit_barrier_after (from);
3128 emit_label_after (label, found_barrier);
3131 return found_barrier;
3134 /* If the instruction INSN is implemented by a special function, and we can
3135 positively find the register that is used to call the sfunc, and this
3136 register is not used anywhere else in this instruction - except as the
3137 destination of a set, return this register; else, return 0. */
3139 sfunc_uses_reg (insn)
3143 rtx pattern, part, reg_part, reg;
3145 if (GET_CODE (insn) != INSN)
3147 pattern = PATTERN (insn);
3148 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3151 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3153 part = XVECEXP (pattern, 0, i);
3154 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3159 reg = XEXP (reg_part, 0);
3160 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3162 part = XVECEXP (pattern, 0, i);
3163 if (part == reg_part || GET_CODE (part) == CLOBBER)
3165 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3166 && GET_CODE (SET_DEST (part)) == REG)
3167 ? SET_SRC (part) : part)))
3173 /* See if the only way in which INSN uses REG is by calling it, or by
3174 setting it while calling it. Set *SET to a SET rtx if the register
3178 noncall_uses_reg (reg, insn, set)
3187 reg2 = sfunc_uses_reg (insn);
3188 if (reg2 && REGNO (reg2) == REGNO (reg))
3190 pattern = single_set (insn);
3192 && GET_CODE (SET_DEST (pattern)) == REG
3193 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3197 if (GET_CODE (insn) != CALL_INSN)
3199 /* We don't use rtx_equal_p because we don't care if the mode is
3201 pattern = single_set (insn);
3203 && GET_CODE (SET_DEST (pattern)) == REG
3204 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3210 par = PATTERN (insn);
3211 if (GET_CODE (par) == PARALLEL)
3212 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3214 part = XVECEXP (par, 0, i);
3215 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3218 return reg_mentioned_p (reg, SET_SRC (pattern));
3224 pattern = PATTERN (insn);
3226 if (GET_CODE (pattern) == PARALLEL)
3230 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3231 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3233 pattern = XVECEXP (pattern, 0, 0);
3236 if (GET_CODE (pattern) == SET)
3238 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3240 /* We don't use rtx_equal_p, because we don't care if the
3241 mode is different. */
3242 if (GET_CODE (SET_DEST (pattern)) != REG
3243 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3249 pattern = SET_SRC (pattern);
3252 if (GET_CODE (pattern) != CALL
3253 || GET_CODE (XEXP (pattern, 0)) != MEM
3254 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3260 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3261 general registers. Bits 0..15 mean that the respective registers
3262 are used as inputs in the instruction. Bits 16..31 mean that the
3263 registers 0..15, respectively, are used as outputs, or are clobbered.
3264 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3266 regs_used (x, is_dest)
3275 code = GET_CODE (x);
3280 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3281 << (REGNO (x) + is_dest));
3285 rtx y = SUBREG_REG (x);
3287 if (GET_CODE (y) != REG)
3290 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3292 subreg_regno_offset (REGNO (y),
3295 GET_MODE (x)) + is_dest));
3299 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3301 /* If there was a return value, it must have been indicated with USE. */
3316 fmt = GET_RTX_FORMAT (code);
3318 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3323 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3324 used |= regs_used (XVECEXP (x, i, j), is_dest);
3326 else if (fmt[i] == 'e')
3327 used |= regs_used (XEXP (x, i), is_dest);
3332 /* Create an instruction that prevents redirection of a conditional branch
3333 to the destination of the JUMP with address ADDR.
3334 If the branch needs to be implemented as an indirect jump, try to find
3335 a scratch register for it.
3336 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3337 If any preceding insn that doesn't fit into a delay slot is good enough,
3338 pass 1. Pass 2 if a definite blocking insn is needed.
3339 -1 is used internally to avoid deep recursion.
3340 If a blocking instruction is made or recognized, return it. */
3343 gen_block_redirect (jump, addr, need_block)
3345 int addr, need_block;
3348 rtx prev = prev_nonnote_insn (jump);
3351 /* First, check if we already have an instruction that satisfies our need. */
3352 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3354 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3356 if (GET_CODE (PATTERN (prev)) == USE
3357 || GET_CODE (PATTERN (prev)) == CLOBBER
3358 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3360 else if ((need_block &= ~1) < 0)
3362 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3365 /* We can't use JUMP_LABEL here because it might be undefined
3366 when not optimizing. */
3367 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3368 /* If the branch is out of range, try to find a scratch register for it. */
3370 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3374 /* Don't look for the stack pointer as a scratch register,
3375 it would cause trouble if an interrupt occurred. */
3376 unsigned try = 0x7fff, used;
3377 int jump_left = flag_expensive_optimizations + 1;
3379 /* It is likely that the most recent eligible instruction is wanted for
3380 the delay slot. Therefore, find out which registers it uses, and
3381 try to avoid using them. */
3383 for (scan = jump; (scan = PREV_INSN (scan)); )
3387 if (INSN_DELETED_P (scan))
3389 code = GET_CODE (scan);
3390 if (code == CODE_LABEL || code == JUMP_INSN)
3393 && GET_CODE (PATTERN (scan)) != USE
3394 && GET_CODE (PATTERN (scan)) != CLOBBER
3395 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3397 try &= ~regs_used (PATTERN (scan), 0);
3401 for (used = dead = 0, scan = JUMP_LABEL (jump);
3402 (scan = NEXT_INSN (scan)); )
3406 if (INSN_DELETED_P (scan))
3408 code = GET_CODE (scan);
3409 if (GET_RTX_CLASS (code) == 'i')
3411 used |= regs_used (PATTERN (scan), 0);
3412 if (code == CALL_INSN)
3413 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3414 dead |= (used >> 16) & ~used;
3420 if (code == JUMP_INSN)
3422 if (jump_left-- && simplejump_p (scan))
3423 scan = JUMP_LABEL (scan);
3429 /* Mask out the stack pointer again, in case it was
3430 the only 'free' register we have found. */
3433 /* If the immediate destination is still in range, check for possible
3434 threading with a jump beyond the delay slot insn.
3435 Don't check if we are called recursively; the jump has been or will be
3436 checked in a different invocation then. */
3438 else if (optimize && need_block >= 0)
3440 rtx next = next_active_insn (next_active_insn (dest));
3441 if (next && GET_CODE (next) == JUMP_INSN
3442 && GET_CODE (PATTERN (next)) == SET
3443 && recog_memoized (next) == CODE_FOR_jump)
3445 dest = JUMP_LABEL (next);
3447 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3449 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3455 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3457 /* It would be nice if we could convert the jump into an indirect
3458 jump / far branch right now, and thus exposing all constituent
3459 instructions to further optimization. However, reorg uses
3460 simplejump_p to determine if there is an unconditional jump where
3461 it should try to schedule instructions from the target of the
3462 branch; simplejump_p fails for indirect jumps even if they have
3464 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3465 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3467 /* ??? We would like this to have the scope of the jump, but that
3468 scope will change when a delay slot insn of an inner scope is added.
3469 Hence, after delay slot scheduling, we'll have to expect
3470 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3473 INSN_SCOPE (insn) = INSN_SCOPE (jump);
3474 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3477 else if (need_block)
3478 /* We can't use JUMP_LABEL here because it might be undefined
3479 when not optimizing. */
3480 return emit_insn_before (gen_block_branch_redirect
3481 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3486 #define CONDJUMP_MIN -252
3487 #define CONDJUMP_MAX 262
3490 /* A label (to be placed) in front of the jump
3491 that jumps to our ultimate destination. */
3493 /* Where we are going to insert it if we cannot move the jump any farther,
3494 or the jump itself if we have picked up an existing jump. */
3496 /* The ultimate destination. */
3498 struct far_branch *prev;
3499 /* If the branch has already been created, its address;
3500 else the address of its first prospective user. */
3504 static void gen_far_branch PARAMS ((struct far_branch *));
3505 enum mdep_reorg_phase_e mdep_reorg_phase;
3508 struct far_branch *bp;
3510 rtx insn = bp->insert_place;
3512 rtx label = gen_label_rtx ();
3514 emit_label_after (label, insn);
3517 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3518 LABEL_NUSES (bp->far_label)++;
3521 jump = emit_jump_insn_after (gen_return (), insn);
3522 /* Emit a barrier so that reorg knows that any following instructions
3523 are not reachable via a fall-through path.
3524 But don't do this when not optimizing, since we wouldn't supress the
3525 alignment for the barrier then, and could end up with out-of-range
3526 pc-relative loads. */
3528 emit_barrier_after (jump);
3529 emit_label_after (bp->near_label, insn);
3530 JUMP_LABEL (jump) = bp->far_label;
3531 if (! invert_jump (insn, label, 1))
3534 (gen_stuff_delay_slot
3535 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3536 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3538 /* Prevent reorg from undoing our splits. */
3539 gen_block_redirect (jump, bp->address += 2, 2);
3542 /* Fix up ADDR_DIFF_VECs. */
3544 fixup_addr_diff_vecs (first)
3549 for (insn = first; insn; insn = NEXT_INSN (insn))
3551 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3553 if (GET_CODE (insn) != JUMP_INSN
3554 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3556 pat = PATTERN (insn);
3557 vec_lab = XEXP (XEXP (pat, 0), 0);
3559 /* Search the matching casesi_jump_2. */
3560 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3562 if (GET_CODE (prev) != JUMP_INSN)
3564 prevpat = PATTERN (prev);
3565 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3567 x = XVECEXP (prevpat, 0, 1);
3568 if (GET_CODE (x) != USE)
3571 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3575 /* Emit the reference label of the braf where it belongs, right after
3576 the casesi_jump_2 (i.e. braf). */
3577 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3578 emit_label_after (braf_label, prev);
3580 /* Fix up the ADDR_DIF_VEC to be relative
3581 to the reference address of the braf. */
3582 XEXP (XEXP (pat, 0), 0) = braf_label;
3586 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3587 a barrier. Return the base 2 logarithm of the desired alignment. */
3589 barrier_align (barrier_or_label)
3590 rtx barrier_or_label;
3592 rtx next = next_real_insn (barrier_or_label), pat, prev;
3593 int slot, credit, jump_to_next = 0;
3598 pat = PATTERN (next);
3600 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3603 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3604 /* This is a barrier in front of a constant table. */
3607 prev = prev_real_insn (barrier_or_label);
3608 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3610 pat = PATTERN (prev);
3611 /* If this is a very small table, we want to keep the alignment after
3612 the table to the minimum for proper code alignment. */
3613 return ((TARGET_SMALLCODE
3614 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3615 <= (unsigned)1 << (CACHE_LOG - 2)))
3616 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3619 if (TARGET_SMALLCODE)
3622 if (! TARGET_SH2 || ! optimize)
3623 return align_jumps_log;
3625 /* When fixing up pcloads, a constant table might be inserted just before
3626 the basic block that ends with the barrier. Thus, we can't trust the
3627 instruction lengths before that. */
3628 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3630 /* Check if there is an immediately preceding branch to the insn beyond
3631 the barrier. We must weight the cost of discarding useful information
3632 from the current cache line when executing this branch and there is
3633 an alignment, against that of fetching unneeded insn in front of the
3634 branch target when there is no alignment. */
3636 /* There are two delay_slot cases to consider. One is the simple case
3637 where the preceding branch is to the insn beyond the barrier (simple
3638 delay slot filling), and the other is where the preceding branch has
3639 a delay slot that is a duplicate of the insn after the barrier
3640 (fill_eager_delay_slots) and the branch is to the insn after the insn
3641 after the barrier. */
3643 /* PREV is presumed to be the JUMP_INSN for the barrier under
3644 investigation. Skip to the insn before it. */
3645 prev = prev_real_insn (prev);
3647 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3648 credit >= 0 && prev && GET_CODE (prev) == INSN;
3649 prev = prev_real_insn (prev))
3652 if (GET_CODE (PATTERN (prev)) == USE
3653 || GET_CODE (PATTERN (prev)) == CLOBBER)
3655 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3657 prev = XVECEXP (PATTERN (prev), 0, 1);
3658 if (INSN_UID (prev) == INSN_UID (next))
3660 /* Delay slot was filled with insn at jump target. */
3667 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3669 credit -= get_attr_length (prev);
3672 && GET_CODE (prev) == JUMP_INSN
3673 && JUMP_LABEL (prev))
3677 || next_real_insn (JUMP_LABEL (prev)) == next
3678 /* If relax_delay_slots() decides NEXT was redundant
3679 with some previous instruction, it will have
3680 redirected PREV's jump to the following insn. */
3681 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3682 /* There is no upper bound on redundant instructions
3683 that might have been skipped, but we must not put an
3684 alignment where none had been before. */
3685 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3687 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3688 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch))))
3690 rtx pat = PATTERN (prev);
3691 if (GET_CODE (pat) == PARALLEL)
3692 pat = XVECEXP (pat, 0, 0);
3693 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3699 return align_jumps_log;
3702 /* If we are inside a phony loop, almost any kind of label can turn up as the
3703 first one in the loop. Aligning a braf label causes incorrect switch
3704 destination addresses; we can detect braf labels because they are
3705 followed by a BARRIER.
3706 Applying loop alignment to small constant or switch tables is a waste
3707 of space, so we suppress this too. */
3709 sh_loop_align (label)
3715 next = next_nonnote_insn (next);
3716 while (next && GET_CODE (next) == CODE_LABEL);
3720 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3721 || recog_memoized (next) == CODE_FOR_consttable_2)
3724 return align_loops_log;
3727 /* Exported to toplev.c.
3729 Do a final pass over the function, just before delayed branch
3733 machine_dependent_reorg (first)
3736 rtx insn, mova = NULL_RTX;
3738 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3739 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3741 /* We must split call insns before introducing `mova's. If we're
3742 optimizing, they'll have already been split. Otherwise, make
3743 sure we don't split them too late. */
3745 split_all_insns_noflow ();
3750 /* If relaxing, generate pseudo-ops to associate function calls with
3751 the symbols they call. It does no harm to not generate these
3752 pseudo-ops. However, when we can generate them, it enables to
3753 linker to potentially relax the jsr to a bsr, and eliminate the
3754 register load and, possibly, the constant pool entry. */
3756 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3759 /* Remove all REG_LABEL notes. We want to use them for our own
3760 purposes. This works because none of the remaining passes
3761 need to look at them.
3763 ??? But it may break in the future. We should use a machine
3764 dependent REG_NOTE, or some other approach entirely. */
3765 for (insn = first; insn; insn = NEXT_INSN (insn))
3771 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3772 remove_note (insn, note);
3776 for (insn = first; insn; insn = NEXT_INSN (insn))
3778 rtx pattern, reg, link, set, scan, dies, label;
3779 int rescan = 0, foundinsn = 0;
3781 if (GET_CODE (insn) == CALL_INSN)
3783 pattern = PATTERN (insn);
3785 if (GET_CODE (pattern) == PARALLEL)
3786 pattern = XVECEXP (pattern, 0, 0);
3787 if (GET_CODE (pattern) == SET)
3788 pattern = SET_SRC (pattern);
3790 if (GET_CODE (pattern) != CALL
3791 || GET_CODE (XEXP (pattern, 0)) != MEM)
3794 reg = XEXP (XEXP (pattern, 0), 0);
3798 reg = sfunc_uses_reg (insn);
3803 if (GET_CODE (reg) != REG)
3806 /* This is a function call via REG. If the only uses of REG
3807 between the time that it is set and the time that it dies
3808 are in function calls, then we can associate all the
3809 function calls with the setting of REG. */
3811 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3813 if (REG_NOTE_KIND (link) != 0)
3815 set = single_set (XEXP (link, 0));
3816 if (set && rtx_equal_p (reg, SET_DEST (set)))
3818 link = XEXP (link, 0);
3825 /* ??? Sometimes global register allocation will have
3826 deleted the insn pointed to by LOG_LINKS. Try
3827 scanning backward to find where the register is set. */
3828 for (scan = PREV_INSN (insn);
3829 scan && GET_CODE (scan) != CODE_LABEL;
3830 scan = PREV_INSN (scan))
3832 if (! INSN_P (scan))
3835 if (! reg_mentioned_p (reg, scan))
3838 if (noncall_uses_reg (reg, scan, &set))
3852 /* The register is set at LINK. */
3854 /* We can only optimize the function call if the register is
3855 being set to a symbol. In theory, we could sometimes
3856 optimize calls to a constant location, but the assembler
3857 and linker do not support that at present. */
3858 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3859 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3862 /* Scan forward from LINK to the place where REG dies, and
3863 make sure that the only insns which use REG are
3864 themselves function calls. */
3866 /* ??? This doesn't work for call targets that were allocated
3867 by reload, since there may not be a REG_DEAD note for the
3871 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3875 /* Don't try to trace forward past a CODE_LABEL if we haven't
3876 seen INSN yet. Ordinarily, we will only find the setting insn
3877 in LOG_LINKS if it is in the same basic block. However,
3878 cross-jumping can insert code labels in between the load and
3879 the call, and can result in situations where a single call
3880 insn may have two targets depending on where we came from. */
3882 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3885 if (! INSN_P (scan))
3888 /* Don't try to trace forward past a JUMP. To optimize
3889 safely, we would have to check that all the
3890 instructions at the jump destination did not use REG. */
3892 if (GET_CODE (scan) == JUMP_INSN)
3895 if (! reg_mentioned_p (reg, scan))
3898 if (noncall_uses_reg (reg, scan, &scanset))
3905 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3907 /* There is a function call to this register other
3908 than the one we are checking. If we optimize
3909 this call, we need to rescan again below. */
3913 /* ??? We shouldn't have to worry about SCANSET here.
3914 We should just be able to check for a REG_DEAD note
3915 on a function call. However, the REG_DEAD notes are
3916 apparently not dependable around libcalls; c-torture
3917 execute/920501-2 is a test case. If SCANSET is set,
3918 then this insn sets the register, so it must have
3919 died earlier. Unfortunately, this will only handle
3920 the cases in which the register is, in fact, set in a
3923 /* ??? We shouldn't have to use FOUNDINSN here.
3924 However, the LOG_LINKS fields are apparently not
3925 entirely reliable around libcalls;
3926 newlib/libm/math/e_pow.c is a test case. Sometimes
3927 an insn will appear in LOG_LINKS even though it is
3928 not the most recent insn which sets the register. */
3932 || find_reg_note (scan, REG_DEAD, reg)))
3941 /* Either there was a branch, or some insn used REG
3942 other than as a function call address. */
3946 /* Create a code label, and put it in a REG_LABEL note on
3947 the insn which sets the register, and on each call insn
3948 which uses the register. In final_prescan_insn we look
3949 for the REG_LABEL notes, and output the appropriate label
3952 label = gen_label_rtx ();
3953 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3955 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3964 scan = NEXT_INSN (scan);
3966 && ((GET_CODE (scan) == CALL_INSN
3967 && reg_mentioned_p (reg, scan))
3968 || ((reg2 = sfunc_uses_reg (scan))
3969 && REGNO (reg2) == REGNO (reg))))
3971 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3973 while (scan != dies);
3979 fixup_addr_diff_vecs (first);
3983 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3984 shorten_branches (first);
3986 /* Scan the function looking for move instructions which have to be
3987 changed to pc-relative loads and insert the literal tables. */
3989 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3990 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3997 else if (GET_CODE (insn) == JUMP_INSN
3998 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4006 /* Some code might have been inserted between the mova and
4007 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4008 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4009 total += get_attr_length (scan);
4011 /* range of mova is 1020, add 4 because pc counts from address of
4012 second instruction after this one, subtract 2 in case pc is 2
4013 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4014 cancels out with alignment effects of the mova itself. */
4017 /* Change the mova into a load, and restart scanning
4018 there. broken_move will then return true for mova. */
4019 SET_SRC (PATTERN (mova))
4020 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4021 INSN_CODE (mova) = -1;
4025 if (broken_move (insn))
4028 /* Scan ahead looking for a barrier to stick the constant table
4030 rtx barrier = find_barrier (num_mova, mova, insn);
4031 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4033 if (num_mova && ! mova_p (mova))
4035 /* find_barrier had to change the first mova into a
4036 pcload; thus, we have to start with this new pcload. */
4040 /* Now find all the moves between the points and modify them. */
4041 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4043 if (GET_CODE (scan) == CODE_LABEL)
4045 if (broken_move (scan))
4047 rtx *patp = &PATTERN (scan), pat = *patp;
4051 enum machine_mode mode;
4053 if (GET_CODE (pat) == PARALLEL)
4054 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4055 src = SET_SRC (pat);
4056 dst = SET_DEST (pat);
4057 mode = GET_MODE (dst);
4059 if (mode == SImode && hi_const (src)
4060 && REGNO (dst) != FPUL_REG)
4065 while (GET_CODE (dst) == SUBREG)
4067 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4068 GET_MODE (SUBREG_REG (dst)),
4071 dst = SUBREG_REG (dst);
4073 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4076 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4078 /* This must be an insn that clobbers r0. */
4079 rtx clobber = XVECEXP (PATTERN (scan), 0,
4080 XVECLEN (PATTERN (scan), 0) - 1);
4082 if (GET_CODE (clobber) != CLOBBER
4083 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4087 && reg_set_between_p (r0_rtx, last_float_move, scan))
4091 && GET_MODE_SIZE (mode) != 4
4092 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4094 lab = add_constant (src, mode, last_float);
4096 emit_insn_before (gen_mova (lab), scan);
4099 /* There will be a REG_UNUSED note for r0 on
4100 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4101 lest reorg:mark_target_live_regs will not
4102 consider r0 to be used, and we end up with delay
4103 slot insn in front of SCAN that clobbers r0. */
4105 = find_regno_note (last_float_move, REG_UNUSED, 0);
4107 /* If we are not optimizing, then there may not be
4110 PUT_MODE (note, REG_INC);
4112 *last_float_addr = r0_inc_rtx;
4114 last_float_move = scan;
4116 newsrc = gen_rtx (MEM, mode,
4117 (((TARGET_SH4 && ! TARGET_FMOVD)
4118 || REGNO (dst) == FPUL_REG)
4121 last_float_addr = &XEXP (newsrc, 0);
4123 /* Remove the clobber of r0. */
4124 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4125 RTX_UNCHANGING_P (newsrc) = 1;
4127 /* This is a mova needing a label. Create it. */
4128 else if (GET_CODE (src) == UNSPEC
4129 && XINT (src, 1) == UNSPEC_MOVA
4130 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4132 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4133 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4134 newsrc = gen_rtx_UNSPEC (SImode,
4135 gen_rtvec (1, newsrc),
4140 lab = add_constant (src, mode, 0);
4141 newsrc = gen_rtx_MEM (mode,
4142 gen_rtx_LABEL_REF (VOIDmode, lab));
4143 RTX_UNCHANGING_P (newsrc) = 1;
4145 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4146 INSN_CODE (scan) = -1;
4149 dump_table (barrier);
4154 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4155 INSN_ADDRESSES_FREE ();
4156 split_branches (first);
4158 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4159 also has an effect on the register that holds the address of the sfunc.
4160 Insert an extra dummy insn in front of each sfunc that pretends to
4161 use this register. */
4162 if (flag_delayed_branch)
4164 for (insn = first; insn; insn = NEXT_INSN (insn))
4166 rtx reg = sfunc_uses_reg (insn);
4170 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4174 /* fpscr is not actually a user variable, but we pretend it is for the
4175 sake of the previous optimization passes, since we want it handled like
4176 one. However, we don't have any debugging information for it, so turn
4177 it into a non-user variable now. */
4179 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4181 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4185 get_dest_uid (label, max_uid)
4189 rtx dest = next_real_insn (label);
4192 /* This can happen for an undefined label. */
4194 dest_uid = INSN_UID (dest);
4195 /* If this is a newly created branch redirection blocking instruction,
4196 we cannot index the branch_uid or insn_addresses arrays with its
4197 uid. But then, we won't need to, because the actual destination is
4198 the following branch. */
4199 while (dest_uid >= max_uid)
4201 dest = NEXT_INSN (dest);
4202 dest_uid = INSN_UID (dest);
4204 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4209 /* Split condbranches that are out of range. Also add clobbers for
4210 scratch registers that are needed in far jumps.
4211 We do this before delay slot scheduling, so that it can take our
4212 newly created instructions into account. It also allows us to
4213 find branches with common targets more easily. */
4216 split_branches (first)
4220 struct far_branch **uid_branch, *far_branch_list = 0;
4221 int max_uid = get_max_uid ();
4223 /* Find out which branches are out of range. */
4224 shorten_branches (first);
4226 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4227 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4229 for (insn = first; insn; insn = NEXT_INSN (insn))
4230 if (! INSN_P (insn))
4232 else if (INSN_DELETED_P (insn))
4234 /* Shorten_branches would split this instruction again,
4235 so transform it into a note. */
4236 PUT_CODE (insn, NOTE);
4237 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4238 NOTE_SOURCE_FILE (insn) = 0;
4240 else if (GET_CODE (insn) == JUMP_INSN
4241 /* Don't mess with ADDR_DIFF_VEC */
4242 && (GET_CODE (PATTERN (insn)) == SET
4243 || GET_CODE (PATTERN (insn)) == RETURN))
4245 enum attr_type type = get_attr_type (insn);
4246 if (type == TYPE_CBRANCH)
4250 if (get_attr_length (insn) > 4)
4252 rtx src = SET_SRC (PATTERN (insn));
4253 rtx olabel = XEXP (XEXP (src, 1), 0);
4254 int addr = INSN_ADDRESSES (INSN_UID (insn));
4256 int dest_uid = get_dest_uid (olabel, max_uid);
4257 struct far_branch *bp = uid_branch[dest_uid];
4259 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4260 the label if the LABEL_NUSES count drops to zero. There is
4261 always a jump_optimize pass that sets these values, but it
4262 proceeds to delete unreferenced code, and then if not
4263 optimizing, to un-delete the deleted instructions, thus
4264 leaving labels with too low uses counts. */
4267 JUMP_LABEL (insn) = olabel;
4268 LABEL_NUSES (olabel)++;
4272 bp = (struct far_branch *) alloca (sizeof *bp);
4273 uid_branch[dest_uid] = bp;
4274 bp->prev = far_branch_list;
4275 far_branch_list = bp;
4277 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4278 LABEL_NUSES (bp->far_label)++;
4282 label = bp->near_label;
4283 if (! label && bp->address - addr >= CONDJUMP_MIN)
4285 rtx block = bp->insert_place;
4287 if (GET_CODE (PATTERN (block)) == RETURN)
4288 block = PREV_INSN (block);
4290 block = gen_block_redirect (block,
4292 label = emit_label_after (gen_label_rtx (),
4294 bp->near_label = label;
4296 else if (label && ! NEXT_INSN (label))
4298 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4299 bp->insert_place = insn;
4301 gen_far_branch (bp);
4305 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4307 bp->near_label = label = gen_label_rtx ();
4308 bp->insert_place = insn;
4311 if (! redirect_jump (insn, label, 1))
4316 /* get_attr_length (insn) == 2 */
4317 /* Check if we have a pattern where reorg wants to redirect
4318 the branch to a label from an unconditional branch that
4320 /* We can't use JUMP_LABEL here because it might be undefined
4321 when not optimizing. */
4322 /* A syntax error might cause beyond to be NULL_RTX. */
4324 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4328 && (GET_CODE (beyond) == JUMP_INSN
4329 || ((beyond = next_active_insn (beyond))
4330 && GET_CODE (beyond) == JUMP_INSN))
4331 && GET_CODE (PATTERN (beyond)) == SET
4332 && recog_memoized (beyond) == CODE_FOR_jump
4334 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4335 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4337 gen_block_redirect (beyond,
4338 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4341 next = next_active_insn (insn);
4343 if ((GET_CODE (next) == JUMP_INSN
4344 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4345 && GET_CODE (PATTERN (next)) == SET
4346 && recog_memoized (next) == CODE_FOR_jump
4348 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4349 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4351 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4353 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4355 int addr = INSN_ADDRESSES (INSN_UID (insn));
4358 struct far_branch *bp;
4360 if (type == TYPE_JUMP)
4362 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4363 dest_uid = get_dest_uid (far_label, max_uid);
4366 /* Parse errors can lead to labels outside
4368 if (! NEXT_INSN (far_label))
4373 JUMP_LABEL (insn) = far_label;
4374 LABEL_NUSES (far_label)++;
4376 redirect_jump (insn, NULL_RTX, 1);
4380 bp = uid_branch[dest_uid];
4383 bp = (struct far_branch *) alloca (sizeof *bp);
4384 uid_branch[dest_uid] = bp;
4385 bp->prev = far_branch_list;
4386 far_branch_list = bp;
4388 bp->far_label = far_label;
4390 LABEL_NUSES (far_label)++;
4392 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4393 if (addr - bp->address <= CONDJUMP_MAX)
4394 emit_label_after (bp->near_label, PREV_INSN (insn));
4397 gen_far_branch (bp);
4403 bp->insert_place = insn;
4405 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4407 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4410 /* Generate all pending far branches,
4411 and free our references to the far labels. */
4412 while (far_branch_list)
4414 if (far_branch_list->near_label
4415 && ! NEXT_INSN (far_branch_list->near_label))
4416 gen_far_branch (far_branch_list);
4418 && far_branch_list->far_label
4419 && ! --LABEL_NUSES (far_branch_list->far_label))
4420 delete_insn (far_branch_list->far_label);
4421 far_branch_list = far_branch_list->prev;
4424 /* Instruction length information is no longer valid due to the new
4425 instructions that have been generated. */
4426 init_insn_lengths ();
4429 /* Dump out instruction addresses, which is useful for debugging the
4430 constant pool table stuff.
4432 If relaxing, output the label and pseudo-ops used to link together
4433 calls and the instruction which set the registers. */
4435 /* ??? The addresses printed by this routine for insns are nonsense for
4436 insns which are inside of a sequence where none of the inner insns have
4437 variable length. This is because the second pass of shorten_branches
4438 does not bother to update them. */
4441 final_prescan_insn (insn, opvec, noperands)
4443 rtx *opvec ATTRIBUTE_UNUSED;
4444 int noperands ATTRIBUTE_UNUSED;
4446 if (TARGET_DUMPISIZE)
4447 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4453 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4458 pattern = PATTERN (insn);
4459 if (GET_CODE (pattern) == PARALLEL)
4460 pattern = XVECEXP (pattern, 0, 0);
4461 if (GET_CODE (pattern) == CALL
4462 || (GET_CODE (pattern) == SET
4463 && (GET_CODE (SET_SRC (pattern)) == CALL
4464 || get_attr_type (insn) == TYPE_SFUNC)))
4465 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4466 CODE_LABEL_NUMBER (XEXP (note, 0)));
4467 else if (GET_CODE (pattern) == SET)
4468 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4469 CODE_LABEL_NUMBER (XEXP (note, 0)));
4476 /* Dump out any constants accumulated in the final pass. These will
4480 output_jump_label_table ()
4486 fprintf (asm_out_file, "\t.align 2\n");
4487 for (i = 0; i < pool_size; i++)
4489 pool_node *p = &pool_vector[i];
4491 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4492 CODE_LABEL_NUMBER (p->label));
4493 output_asm_insn (".long %O0", &p->value);
4501 /* A full frame looks like:
4505 [ if current_function_anonymous_args
4518 local-0 <- fp points here. */
4520 /* Number of bytes pushed for anonymous args, used to pass information
4521 between expand_prologue and expand_epilogue. */
4523 static int extra_push;
4525 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4526 to be adjusted, and TEMP, if nonnegative, holds the register number
4527 of a general register that we may clobber. */
4530 output_stack_adjust (size, reg, temp, emit_fn)
4534 rtx (*emit_fn) PARAMS ((rtx));
4538 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4543 if (CONST_OK_FOR_ADD (size))
4544 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4545 /* Try to do it with two partial adjustments; however, we must make
4546 sure that the stack is properly aligned at all times, in case
4547 an interrupt occurs between the two partial adjustments. */
4548 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4549 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4551 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4552 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4559 /* If TEMP is invalid, we could temporarily save a general
4560 register to MACL. However, there is currently no need
4561 to handle this case, so just abort when we see it. */
4564 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4566 /* If SIZE is negative, subtract the positive value.
4567 This sometimes allows a constant pool entry to be shared
4568 between prologue and epilogue code. */
4571 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4572 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4576 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4577 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4579 if (emit_fn == frame_insn)
4581 = (gen_rtx_EXPR_LIST
4582 (REG_FRAME_RELATED_EXPR,
4583 gen_rtx_SET (VOIDmode, reg,
4584 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4595 RTX_FRAME_RELATED_P (x) = 1;
4599 /* Output RTL to push register RN onto the stack. */
4607 x = gen_push_fpul ();
4608 else if (rn == FPSCR_REG)
4609 x = gen_push_fpscr ();
4610 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4611 && FP_OR_XD_REGISTER_P (rn))
4613 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4615 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4617 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4618 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4620 x = gen_push (gen_rtx_REG (SImode, rn));
4624 = gen_rtx_EXPR_LIST (REG_INC,
4625 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4629 /* Output RTL to pop register RN from the stack. */
4637 x = gen_pop_fpul ();
4638 else if (rn == FPSCR_REG)
4639 x = gen_pop_fpscr ();
4640 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4641 && FP_OR_XD_REGISTER_P (rn))
4643 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4645 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4647 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4648 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4650 x = gen_pop (gen_rtx_REG (SImode, rn));
4654 = gen_rtx_EXPR_LIST (REG_INC,
4655 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4658 /* Generate code to push the regs specified in the mask. */
4662 HOST_WIDE_INT *mask;
4666 /* Push PR last; this gives better latencies after the prologue, and
4667 candidates for the return delay slot when there are no general
4668 registers pushed. */
4669 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4670 if (i != PR_REG && mask[i / 32] & (1 << (i % 32)))
4672 if (mask[PR_REG / 32] & (1 << (PR_REG % 32)))
4676 /* Work out the registers which need to be saved, both as a mask and a
4677 count of saved words.
4679 If doing a pragma interrupt function, then push all regs used by the
4680 function, and if we call another function (we can tell by looking at PR),
4681 make sure that all the regs it clobbers are safe too. */
4684 calc_live_regs (count_ptr, live_regs_mask)
4686 HOST_WIDE_INT *live_regs_mask;
4690 int interrupt_handler;
4693 interrupt_handler = sh_cfun_interrupt_handler_p ();
4695 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4696 live_regs_mask[count] = 0;
4697 /* If we can save a lot of saves by switching to double mode, do that. */
4698 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4699 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4700 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4701 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4704 target_flags &= ~FPU_SINGLE_BIT;
4707 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4708 knows how to use it. That means the pseudo originally allocated for
4709 the initial value can become the PR_MEDIA_REG hard register, as seen for
4710 execute/20010122-1.c:test9. */
4712 pr_live = regs_ever_live[PR_MEDIA_REG];
4715 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4716 pr_live = (pr_initial
4717 ? (GET_CODE (pr_initial) != REG
4718 || REGNO (pr_initial) != (PR_REG))
4719 : regs_ever_live[PR_REG]);
4721 /* Force PR to be live if the prologue has to call the SHmedia
4722 argument decoder or register saver. */
4723 if (TARGET_SHCOMPACT
4724 && ((current_function_args_info.call_cookie
4725 & ~ CALL_COOKIE_RET_TRAMP (1))
4726 || current_function_has_nonlocal_label))
4728 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4730 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4732 : (interrupt_handler && ! pragma_trapa)
4733 ? (/* Need to save all the regs ever live. */
4734 (regs_ever_live[reg]
4735 || (call_used_regs[reg]
4736 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4738 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4739 && reg != RETURN_ADDRESS_POINTER_REGNUM
4740 && reg != T_REG && reg != GBR_REG
4741 /* Push fpscr only on targets which have FPU */
4742 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4743 : (/* Only push those regs which are used and need to be saved. */
4746 && current_function_args_info.call_cookie
4747 && reg == PIC_OFFSET_TABLE_REGNUM)
4748 || (regs_ever_live[reg] && ! call_used_regs[reg])
4749 || (current_function_calls_eh_return
4750 && (reg == EH_RETURN_DATA_REGNO (0)
4751 || reg == EH_RETURN_DATA_REGNO (1)
4752 || reg == EH_RETURN_DATA_REGNO (2)
4753 || reg == EH_RETURN_DATA_REGNO (3)))))
4755 live_regs_mask[reg / 32] |= 1 << (reg % 32);
4756 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4758 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4759 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4761 if (FP_REGISTER_P (reg))
4763 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4765 live_regs_mask[(reg ^ 1) / 32] |= 1 << ((reg ^ 1) % 32);
4766 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4769 else if (XD_REGISTER_P (reg))
4771 /* Must switch to double mode to access these registers. */
4772 target_flags &= ~FPU_SINGLE_BIT;
4781 /* Code to generate prologue and epilogue sequences */
4783 /* PUSHED is the number of bytes that are being pushed on the
4784 stack for register saves. Return the frame size, padded
4785 appropriately so that the stack stays properly aligned. */
4786 static HOST_WIDE_INT
4787 rounded_frame_size (pushed)
4790 HOST_WIDE_INT size = get_frame_size ();
4791 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4793 return ((size + pushed + align - 1) & -align) - pushed;
4796 /* Choose a call-clobbered target-branch register that remains
4797 unchanged along the whole function. We set it up as the return
4798 value in the prologue. */
4800 sh_media_register_for_return ()
4805 if (! current_function_is_leaf)
4808 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4810 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4811 if (call_used_regs[regno] && ! regs_ever_live[regno])
4818 sh_expand_prologue ()
4820 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4823 int save_flags = target_flags;
4825 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4827 /* We have pretend args if we had an object sent partially in registers
4828 and partially on the stack, e.g. a large structure. */
4829 output_stack_adjust (-current_function_pretend_args_size
4830 - current_function_args_info.stack_regs * 8,
4831 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4835 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4836 /* We're going to use the PIC register to load the address of the
4837 incoming-argument decoder and/or of the return trampoline from
4838 the GOT, so make sure the PIC register is preserved and
4840 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4842 if (TARGET_SHCOMPACT
4843 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4847 /* First, make all registers with incoming arguments that will
4848 be pushed onto the stack live, so that register renaming
4849 doesn't overwrite them. */
4850 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4851 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4852 >= NPARM_REGS (SImode) - reg)
4853 for (; reg < NPARM_REGS (SImode); reg++)
4854 emit_insn (gen_shcompact_preserve_incoming_args
4855 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4856 else if (CALL_COOKIE_INT_REG_GET
4857 (current_function_args_info.call_cookie, reg) == 1)
4858 emit_insn (gen_shcompact_preserve_incoming_args
4859 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4861 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4863 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4864 GEN_INT (current_function_args_info.call_cookie));
4865 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4866 gen_rtx_REG (SImode, R0_REG));
4868 else if (TARGET_SHMEDIA)
4870 int tr = sh_media_register_for_return ();
4874 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4875 gen_rtx_REG (DImode, PR_MEDIA_REG));
4877 /* If this function only exits with sibcalls, this copy
4878 will be flagged as dead. */
4879 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4885 /* Emit the code for SETUP_VARARGS. */
4886 if (current_function_stdarg)
4888 /* This is not used by the SH2E calling convention */
4889 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 && ! TARGET_HITACHI)
4891 /* Push arg regs as if they'd been provided by caller in stack. */
4892 for (i = 0; i < NPARM_REGS(SImode); i++)
4894 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4897 if (i >= (NPARM_REGS(SImode)
4898 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4902 RTX_FRAME_RELATED_P (insn) = 0;
4908 /* If we're supposed to switch stacks at function entry, do so now. */
4910 emit_insn (gen_sp_switch_1 ());
4912 calc_live_regs (&d, live_regs_mask);
4913 /* ??? Maybe we could save some switching if we can move a mode switch
4914 that already happens to be at the function start into the prologue. */
4915 if (target_flags != save_flags)
4916 emit_insn (gen_toggle_sz ());
4923 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4924 int offset_in_r0 = -1;
4927 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4928 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4929 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4931 offset = d + d_rounding;
4932 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4934 /* We loop twice: first, we save 8-byte aligned registers in the
4935 higher addresses, that are known to be aligned. Then, we
4936 proceed to saving 32-bit registers that don't need 8-byte
4938 /* Note that if you change this code in a way that affects where
4939 the return register is saved, you have to update not only
4940 sh_expand_epilogue, but also sh_set_return_address. */
4941 for (align = 1; align >= 0; align--)
4942 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4943 if (live_regs_mask[i/32] & (1 << (i % 32)))
4945 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4947 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4949 if (mode == SFmode && (i % 2) == 1
4950 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4951 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4958 /* If we're doing the aligned pass and this is not aligned,
4959 or we're doing the unaligned pass and this is aligned,
4961 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4965 offset -= GET_MODE_SIZE (mode);
4967 reg_rtx = gen_rtx_REG (mode, reg);
4969 mem_rtx = gen_rtx_MEM (mode,
4970 gen_rtx_PLUS (Pmode,
4974 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
4980 if (HAVE_PRE_DECREMENT
4981 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
4982 || mem_rtx == NULL_RTX
4983 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4985 pre_dec = gen_rtx_MEM (mode,
4986 gen_rtx_PRE_DEC (Pmode, r0));
4988 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
4997 offset += GET_MODE_SIZE (mode);
5001 if (mem_rtx != NULL_RTX)
5004 if (offset_in_r0 == -1)
5006 emit_move_insn (r0, GEN_INT (offset));
5007 offset_in_r0 = offset;
5009 else if (offset != offset_in_r0)
5014 GEN_INT (offset - offset_in_r0)));
5015 offset_in_r0 += offset - offset_in_r0;
5018 if (pre_dec != NULL_RTX)
5024 (Pmode, r0, stack_pointer_rtx));
5028 offset -= GET_MODE_SIZE (mode);
5029 offset_in_r0 -= GET_MODE_SIZE (mode);
5034 mem_rtx = gen_rtx_MEM (mode, r0);
5036 mem_rtx = gen_rtx_MEM (mode,
5037 gen_rtx_PLUS (Pmode,
5041 /* We must not use an r0-based address for target-branch
5042 registers or for special registers without pre-dec
5043 memory addresses, since we store their values in r0
5045 if (TARGET_REGISTER_P (i)
5046 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5047 && mem_rtx != pre_dec))
5051 if (TARGET_REGISTER_P (i)
5052 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5053 && mem_rtx != pre_dec))
5055 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
5057 emit_move_insn (r0mode, reg_rtx);
5065 emit_move_insn (mem_rtx, reg_rtx);
5068 if (offset != d_rounding)
5072 push_regs (live_regs_mask);
5074 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5076 rtx insn = get_last_insn ();
5077 rtx last = emit_insn (gen_GOTaddr2picreg ());
5079 /* Mark these insns as possibly dead. Sometimes, flow2 may
5080 delete all uses of the PIC register. In this case, let it
5081 delete the initialization too. */
5084 insn = NEXT_INSN (insn);
5086 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5090 while (insn != last);
5093 if (SHMEDIA_REGS_STACK_ADJUST ())
5095 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5096 gen_rtx_SYMBOL_REF (Pmode,
5098 ? "__GCC_push_shmedia_regs"
5099 : "__GCC_push_shmedia_regs_nofpu"));
5100 /* This must NOT go through the PLT, otherwise mach and macl
5101 may be clobbered. */
5102 emit_insn (gen_shmedia_save_restore_regs_compact
5103 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5106 if (target_flags != save_flags)
5108 rtx insn = emit_insn (gen_toggle_sz ());
5110 /* If we're lucky, a mode switch in the function body will
5111 overwrite fpscr, turning this insn dead. Tell flow this
5112 insn is ok to delete. */
5113 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5118 target_flags = save_flags;
5120 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5121 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
5123 if (frame_pointer_needed)
5124 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5126 if (TARGET_SHCOMPACT
5127 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5129 /* This must NOT go through the PLT, otherwise mach and macl
5130 may be clobbered. */
5131 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5132 gen_rtx_SYMBOL_REF (Pmode,
5133 "__GCC_shcompact_incoming_args"));
5134 emit_insn (gen_shcompact_incoming_args ());
5139 sh_expand_epilogue ()
5141 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
5145 int save_flags = target_flags;
5148 calc_live_regs (&d, live_regs_mask);
5150 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
5151 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5152 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5154 frame_size = rounded_frame_size (d) - d_rounding;
5156 if (frame_pointer_needed)
5158 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
5160 /* We must avoid moving the stack pointer adjustment past code
5161 which reads from the local frame, else an interrupt could
5162 occur after the SP adjustment and clobber data in the local
5164 emit_insn (gen_blockage ());
5165 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5167 else if (frame_size)
5169 /* We must avoid moving the stack pointer adjustment past code
5170 which reads from the local frame, else an interrupt could
5171 occur after the SP adjustment and clobber data in the local
5173 emit_insn (gen_blockage ());
5174 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
5177 if (SHMEDIA_REGS_STACK_ADJUST ())
5179 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5180 gen_rtx_SYMBOL_REF (Pmode,
5182 ? "__GCC_pop_shmedia_regs"
5183 : "__GCC_pop_shmedia_regs_nofpu"));
5184 /* This must NOT go through the PLT, otherwise mach and macl
5185 may be clobbered. */
5186 emit_insn (gen_shmedia_save_restore_regs_compact
5187 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5190 /* Pop all the registers. */
5192 if (target_flags != save_flags)
5193 emit_insn (gen_toggle_sz ());
5196 int offset = d_rounding;
5197 int offset_in_r0 = -1;
5200 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5202 /* We loop twice: first, we save 8-byte aligned registers in the
5203 higher addresses, that are known to be aligned. Then, we
5204 proceed to saving 32-bit registers that don't need 8-byte
5206 for (align = 0; align <= 1; align++)
5207 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5208 if (live_regs_mask[i/32] & (1 << (i % 32)))
5210 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5212 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5214 if (mode == SFmode && (i % 2) == 0
5215 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5216 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
5222 /* If we're doing the aligned pass and this is not aligned,
5223 or we're doing the unaligned pass and this is aligned,
5225 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5229 reg_rtx = gen_rtx_REG (mode, reg);
5231 mem_rtx = gen_rtx_MEM (mode,
5232 gen_rtx_PLUS (Pmode,
5236 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5242 if (HAVE_POST_INCREMENT
5243 && (offset == offset_in_r0
5244 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5245 && mem_rtx == NULL_RTX)
5246 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5248 post_inc = gen_rtx_MEM (mode,
5249 gen_rtx_POST_INC (Pmode, r0));
5251 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5254 post_inc = NULL_RTX;
5263 if (mem_rtx != NULL_RTX)
5266 if (offset_in_r0 == -1)
5268 emit_move_insn (r0, GEN_INT (offset));
5269 offset_in_r0 = offset;
5271 else if (offset != offset_in_r0)
5276 GEN_INT (offset - offset_in_r0)));
5277 offset_in_r0 += offset - offset_in_r0;
5280 if (post_inc != NULL_RTX)
5286 (Pmode, r0, stack_pointer_rtx));
5292 offset_in_r0 += GET_MODE_SIZE (mode);
5295 mem_rtx = gen_rtx_MEM (mode, r0);
5297 mem_rtx = gen_rtx_MEM (mode,
5298 gen_rtx_PLUS (Pmode,
5302 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5303 && mem_rtx != post_inc)
5307 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5308 && mem_rtx != post_inc)
5310 insn = emit_move_insn (r0, mem_rtx);
5313 else if (TARGET_REGISTER_P (i))
5315 rtx r1 = gen_rtx_REG (mode, R1_REG);
5317 insn = emit_move_insn (r1, mem_rtx);
5321 insn = emit_move_insn (reg_rtx, mem_rtx);
5323 offset += GET_MODE_SIZE (mode);
5326 if (offset != d + d_rounding)
5333 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5337 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5339 if (j != PR_REG && live_regs_mask[j / 32] & (1 << (j % 32)))
5343 if (target_flags != save_flags)
5344 emit_insn (gen_toggle_sz ());
5345 target_flags = save_flags;
5347 output_stack_adjust (extra_push + current_function_pretend_args_size
5349 + current_function_args_info.stack_regs * 8,
5350 stack_pointer_rtx, 7, emit_insn);
5352 if (current_function_calls_eh_return)
5353 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5354 EH_RETURN_STACKADJ_RTX));
5356 /* Switch back to the normal stack if necessary. */
5358 emit_insn (gen_sp_switch_2 ());
5360 /* Tell flow the insn that pops PR isn't dead. */
5361 /* PR_REG will never be live in SHmedia mode, and we don't need to
5362 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5363 by the return pattern. */
5364 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5365 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5368 static int sh_need_epilogue_known = 0;
5373 if (! sh_need_epilogue_known)
5378 sh_expand_epilogue ();
5379 epilogue = get_insns ();
5381 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5383 return sh_need_epilogue_known > 0;
5386 /* Emit code to change the current function's return address to RA.
5387 TEMP is available as a scratch register, if needed. */
5390 sh_set_return_address (ra, tmp)
5393 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
5396 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5399 calc_live_regs (&d, live_regs_mask);
5401 /* If pr_reg isn't life, we can set it (or the register given in
5402 sh_media_register_for_return) directly. */
5403 if ((live_regs_mask[pr_reg / 32] & (1 << (pr_reg % 32))) == 0)
5409 int rr_regno = sh_media_register_for_return ();
5414 rr = gen_rtx_REG (DImode, rr_regno);
5417 rr = gen_rtx_REG (SImode, pr_reg);
5419 emit_insn (GEN_MOV (rr, ra));
5420 /* Tell flow the register for return isn't dead. */
5421 emit_insn (gen_rtx_USE (VOIDmode, rr));
5431 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5432 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5433 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5437 /* We loop twice: first, we save 8-byte aligned registers in the
5438 higher addresses, that are known to be aligned. Then, we
5439 proceed to saving 32-bit registers that don't need 8-byte
5441 for (align = 0; align <= 1; align++)
5442 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5443 if (live_regs_mask[i/32] & (1 << (i % 32)))
5445 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5447 if (mode == SFmode && (i % 2) == 0
5448 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5449 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
5455 /* If we're doing the aligned pass and this is not aligned,
5456 or we're doing the unaligned pass and this is aligned,
5458 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5465 offset += GET_MODE_SIZE (mode);
5468 /* We can't find pr register. */
5472 pr_offset = (rounded_frame_size (d) - d_rounding + offset
5473 + SHMEDIA_REGS_STACK_ADJUST ());
5476 pr_offset = rounded_frame_size (d) - d_rounding;
5478 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5479 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5481 tmp = gen_rtx_MEM (Pmode, tmp);
5482 emit_insn (GEN_MOV (tmp, ra));
5485 /* Clear variables at function end. */
5488 sh_output_function_epilogue (file, size)
5489 FILE *file ATTRIBUTE_UNUSED;
5490 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5492 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5493 sh_need_epilogue_known = 0;
5494 sp_switch = NULL_RTX;
5498 sh_builtin_saveregs ()
5500 /* First unnamed integer register. */
5501 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5502 /* Number of integer registers we need to save. */
5503 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5504 /* First unnamed SFmode float reg */
5505 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5506 /* Number of SFmode float regs to save. */
5507 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5510 HOST_WIDE_INT alias_set;
5516 int pushregs = n_intregs;
5518 while (pushregs < NPARM_REGS (SImode) - 1
5519 && (CALL_COOKIE_INT_REG_GET
5520 (current_function_args_info.call_cookie,
5521 NPARM_REGS (SImode) - pushregs)
5524 current_function_args_info.call_cookie
5525 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5530 if (pushregs == NPARM_REGS (SImode))
5531 current_function_args_info.call_cookie
5532 |= (CALL_COOKIE_INT_REG (0, 1)
5533 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5535 current_function_args_info.call_cookie
5536 |= CALL_COOKIE_STACKSEQ (pushregs);
5538 current_function_pretend_args_size += 8 * n_intregs;
5540 if (TARGET_SHCOMPACT)
5544 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5546 error ("__builtin_saveregs not supported by this subtarget");
5553 /* Allocate block of memory for the regs. */
5554 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5555 Or can assign_stack_local accept a 0 SIZE argument? */
5556 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5559 regbuf = gen_rtx_MEM (BLKmode,
5560 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5561 else if (n_floatregs & 1)
5565 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5566 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5567 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5568 regbuf = change_address (regbuf, BLKmode, addr);
5571 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5572 alias_set = get_varargs_alias_set ();
5573 set_mem_alias_set (regbuf, alias_set);
5576 This is optimized to only save the regs that are necessary. Explicitly
5577 named args need not be saved. */
5579 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5580 adjust_address (regbuf, BLKmode,
5581 n_floatregs * UNITS_PER_WORD),
5582 n_intregs, n_intregs * UNITS_PER_WORD);
5585 /* Return the address of the regbuf. */
5586 return XEXP (regbuf, 0);
5589 This is optimized to only save the regs that are necessary. Explicitly
5590 named args need not be saved.
5591 We explicitly build a pointer to the buffer because it halves the insn
5592 count when not optimizing (otherwise the pointer is built for each reg
5594 We emit the moves in reverse order so that we can use predecrement. */
5596 fpregs = gen_reg_rtx (Pmode);
5597 emit_move_insn (fpregs, XEXP (regbuf, 0));
5598 emit_insn (gen_addsi3 (fpregs, fpregs,
5599 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5603 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5605 emit_insn (gen_addsi3 (fpregs, fpregs,
5606 GEN_INT (-2 * UNITS_PER_WORD)));
5607 mem = gen_rtx_MEM (DFmode, fpregs);
5608 set_mem_alias_set (mem, alias_set);
5609 emit_move_insn (mem,
5610 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5612 regno = first_floatreg;
5615 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5616 mem = gen_rtx_MEM (SFmode, fpregs);
5617 set_mem_alias_set (mem, alias_set);
5618 emit_move_insn (mem,
5619 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5620 - (TARGET_LITTLE_ENDIAN != 0)));
5624 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5628 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5629 mem = gen_rtx_MEM (SFmode, fpregs);
5630 set_mem_alias_set (mem, alias_set);
5631 emit_move_insn (mem,
5632 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5635 /* Return the address of the regbuf. */
5636 return XEXP (regbuf, 0);
5639 /* Define the `__builtin_va_list' type for the ABI. */
5644 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5647 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5648 return ptr_type_node;
5650 record = make_node (RECORD_TYPE);
5652 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5654 f_next_o_limit = build_decl (FIELD_DECL,
5655 get_identifier ("__va_next_o_limit"),
5657 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5659 f_next_fp_limit = build_decl (FIELD_DECL,
5660 get_identifier ("__va_next_fp_limit"),
5662 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5665 DECL_FIELD_CONTEXT (f_next_o) = record;
5666 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5667 DECL_FIELD_CONTEXT (f_next_fp) = record;
5668 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5669 DECL_FIELD_CONTEXT (f_next_stack) = record;
5671 TYPE_FIELDS (record) = f_next_o;
5672 TREE_CHAIN (f_next_o) = f_next_o_limit;
5673 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5674 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5675 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5677 layout_type (record);
5682 /* Implement `va_start' for varargs and stdarg. */
5685 sh_va_start (valist, nextarg)
5689 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5690 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5696 expand_builtin_saveregs ();
5697 std_expand_builtin_va_start (valist, nextarg);
5701 if ((! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5703 std_expand_builtin_va_start (valist, nextarg);
5707 f_next_o = TYPE_FIELDS (va_list_type_node);
5708 f_next_o_limit = TREE_CHAIN (f_next_o);
5709 f_next_fp = TREE_CHAIN (f_next_o_limit);
5710 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5711 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5713 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5714 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5715 valist, f_next_o_limit);
5716 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5717 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5718 valist, f_next_fp_limit);
5719 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5720 valist, f_next_stack);
5722 /* Call __builtin_saveregs. */
5723 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5724 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5725 TREE_SIDE_EFFECTS (t) = 1;
5726 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5728 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5733 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5734 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5735 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5736 TREE_SIDE_EFFECTS (t) = 1;
5737 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5739 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5740 TREE_SIDE_EFFECTS (t) = 1;
5741 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5743 nint = current_function_args_info.arg_count[SH_ARG_INT];
5748 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5749 build_int_2 (UNITS_PER_WORD * nint, 0)));
5750 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5751 TREE_SIDE_EFFECTS (t) = 1;
5752 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5754 u = make_tree (ptr_type_node, nextarg);
5755 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5756 TREE_SIDE_EFFECTS (t) = 1;
5757 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5760 /* Implement `va_arg'. */
5763 sh_va_arg (valist, type)
5766 HOST_WIDE_INT size, rsize;
5767 tree tmp, pptr_type_node;
5770 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5772 size = int_size_in_bytes (type);
5773 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5774 pptr_type_node = build_pointer_type (ptr_type_node);
5777 type = build_pointer_type (type);
5779 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) && ! TARGET_HITACHI)
5781 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5782 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5784 rtx lab_false, lab_over;
5786 f_next_o = TYPE_FIELDS (va_list_type_node);
5787 f_next_o_limit = TREE_CHAIN (f_next_o);
5788 f_next_fp = TREE_CHAIN (f_next_o_limit);
5789 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5790 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5792 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5793 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5794 valist, f_next_o_limit);
5795 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5797 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5798 valist, f_next_fp_limit);
5799 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5800 valist, f_next_stack);
5804 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5805 || (TREE_CODE (type) == COMPLEX_TYPE
5806 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5811 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5814 addr_rtx = gen_reg_rtx (Pmode);
5815 lab_false = gen_label_rtx ();
5816 lab_over = gen_label_rtx ();
5821 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5822 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5824 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5826 expand_expr (next_fp_limit, NULL_RTX,
5827 Pmode, EXPAND_NORMAL),
5828 GE, const1_rtx, Pmode, 1, lab_false);
5830 if (TYPE_ALIGN (type) > BITS_PER_WORD
5831 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5832 && (n_floatregs & 1)))
5834 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5835 build_int_2 (UNITS_PER_WORD, 0));
5836 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5837 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5838 TREE_SIDE_EFFECTS (tmp) = 1;
5839 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5842 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5843 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5845 emit_move_insn (addr_rtx, r);
5847 emit_jump_insn (gen_jump (lab_over));
5849 emit_label (lab_false);
5851 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5852 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5854 emit_move_insn (addr_rtx, r);
5858 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5859 build_int_2 (rsize, 0));
5861 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5863 expand_expr (next_o_limit, NULL_RTX,
5864 Pmode, EXPAND_NORMAL),
5865 GT, const1_rtx, Pmode, 1, lab_false);
5867 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5868 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5870 emit_move_insn (addr_rtx, r);
5872 emit_jump_insn (gen_jump (lab_over));
5874 emit_label (lab_false);
5876 if (size > 4 && ! TARGET_SH4)
5878 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5879 TREE_SIDE_EFFECTS (tmp) = 1;
5880 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5883 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5884 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5886 emit_move_insn (addr_rtx, r);
5889 emit_label (lab_over);
5891 tmp = make_tree (pptr_type_node, addr_rtx);
5892 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5895 /* ??? In va-sh.h, there had been code to make values larger than
5896 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5898 result = std_expand_builtin_va_arg (valist, type);
5901 #ifdef POINTERS_EXTEND_UNSIGNED
5902 if (GET_MODE (addr) != Pmode)
5903 addr = convert_memory_address (Pmode, result);
5905 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5906 set_mem_alias_set (result, get_varargs_alias_set ());
5908 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5909 argument to the varargs alias set. */
5913 /* Define the offset between two registers, one to be eliminated, and
5914 the other its replacement, at the start of a routine. */
5917 initial_elimination_offset (from, to)
5922 int regs_saved_rounding = 0;
5923 int total_saved_regs_space;
5924 int total_auto_space;
5925 int save_flags = target_flags;
5928 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
5929 calc_live_regs (®s_saved, live_regs_mask);
5930 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5931 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5932 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5933 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5935 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5936 copy_flags = target_flags;
5937 target_flags = save_flags;
5939 total_saved_regs_space = regs_saved + regs_saved_rounding;
5941 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5942 return total_saved_regs_space + total_auto_space
5943 + current_function_args_info.byref_regs * 8;
5945 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5946 return total_saved_regs_space + total_auto_space
5947 + current_function_args_info.byref_regs * 8;
5949 /* Initial gap between fp and sp is 0. */
5950 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5953 if (from == RETURN_ADDRESS_POINTER_REGNUM
5954 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5958 int i, n = total_saved_regs_space;
5960 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5962 n += total_auto_space;
5964 /* If it wasn't saved, there's not much we can do. */
5965 if ((live_regs_mask[pr_reg / 32] & (1 << (pr_reg % 32))) == 0)
5968 target_flags = copy_flags;
5970 /* We loop twice: first, check 8-byte aligned registers,
5971 that are stored in the higher addresses, that are known
5972 to be aligned. Then, check 32-bit registers that don't
5973 need 8-byte alignment. */
5974 for (align = 1; align >= 0; align--)
5975 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5976 if (live_regs_mask[i/32] & (1 << (i % 32)))
5978 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5980 if (mode == SFmode && (i % 2) == 1
5981 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5982 && (live_regs_mask[(i ^ 1) / 32]
5983 & (1 << ((i ^ 1) % 32))))
5989 /* If we're doing the aligned pass and this is not aligned,
5990 or we're doing the unaligned pass and this is aligned,
5992 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5996 n -= GET_MODE_SIZE (mode);
6000 target_flags = save_flags;
6008 return total_auto_space;
6014 /* Handle machine specific pragmas to be semi-compatible with Hitachi
6018 sh_pr_interrupt (pfile)
6019 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6021 pragma_interrupt = 1;
6026 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6028 pragma_interrupt = pragma_trapa = 1;
6032 sh_pr_nosave_low_regs (pfile)
6033 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6035 pragma_nosave_low_regs = 1;
6038 /* Generate 'handle_interrupt' attribute for decls */
6041 sh_insert_attributes (node, attributes)
6045 if (! pragma_interrupt
6046 || TREE_CODE (node) != FUNCTION_DECL)
6049 /* We are only interested in fields. */
6050 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6053 /* Add a 'handle_interrupt' attribute. */
6054 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6059 /* Supported attributes:
6061 interrupt_handler -- specifies this function is an interrupt handler.
6063 sp_switch -- specifies an alternate stack for an interrupt handler
6066 trap_exit -- use a trapa to exit an interrupt function instead of
6067 an rte instruction. */
6069 const struct attribute_spec sh_attribute_table[] =
6071 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6072 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6073 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6074 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6075 { NULL, 0, 0, false, false, false, NULL }
6078 /* Handle an "interrupt_handler" attribute; arguments as in
6079 struct attribute_spec.handler. */
6081 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
6084 tree args ATTRIBUTE_UNUSED;
6085 int flags ATTRIBUTE_UNUSED;
6088 if (TREE_CODE (*node) != FUNCTION_DECL)
6090 warning ("`%s' attribute only applies to functions",
6091 IDENTIFIER_POINTER (name));
6092 *no_add_attrs = true;
6094 else if (TARGET_SHCOMPACT)
6096 error ("attribute interrupt_handler is not compatible with -m5-compact");
6097 *no_add_attrs = true;
6103 /* Handle an "sp_switch" attribute; arguments as in
6104 struct attribute_spec.handler. */
6106 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
6110 int flags ATTRIBUTE_UNUSED;
6113 if (TREE_CODE (*node) != FUNCTION_DECL)
6115 warning ("`%s' attribute only applies to functions",
6116 IDENTIFIER_POINTER (name));
6117 *no_add_attrs = true;
6119 else if (!pragma_interrupt)
6121 /* The sp_switch attribute only has meaning for interrupt functions. */
6122 warning ("`%s' attribute only applies to interrupt functions",
6123 IDENTIFIER_POINTER (name));
6124 *no_add_attrs = true;
6126 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6128 /* The argument must be a constant string. */
6129 warning ("`%s' attribute argument not a string constant",
6130 IDENTIFIER_POINTER (name));
6131 *no_add_attrs = true;
6135 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6136 TREE_STRING_POINTER (TREE_VALUE (args)));
6142 /* Handle an "trap_exit" attribute; arguments as in
6143 struct attribute_spec.handler. */
6145 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
6149 int flags ATTRIBUTE_UNUSED;
6152 if (TREE_CODE (*node) != FUNCTION_DECL)
6154 warning ("`%s' attribute only applies to functions",
6155 IDENTIFIER_POINTER (name));
6156 *no_add_attrs = true;
6158 else if (!pragma_interrupt)
6160 /* The trap_exit attribute only has meaning for interrupt functions. */
6161 warning ("`%s' attribute only applies to interrupt functions",
6162 IDENTIFIER_POINTER (name));
6163 *no_add_attrs = true;
6165 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6167 /* The argument must be a constant integer. */
6168 warning ("`%s' attribute argument not an integer constant",
6169 IDENTIFIER_POINTER (name));
6170 *no_add_attrs = true;
6174 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6181 sh_cfun_interrupt_handler_p ()
6183 return (lookup_attribute ("interrupt_handler",
6184 DECL_ATTRIBUTES (current_function_decl))
6188 /* Predicates used by the templates. */
6190 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
6191 Used only in general_movsrc_operand. */
6194 system_reg_operand (op, mode)
6196 enum machine_mode mode ATTRIBUTE_UNUSED;
6208 /* Returns 1 if OP can be source of a simple move operation.
6209 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
6210 invalid as are subregs of system registers. */
6213 general_movsrc_operand (op, mode)
6215 enum machine_mode mode;
6217 if (GET_CODE (op) == MEM)
6219 rtx inside = XEXP (op, 0);
6220 if (GET_CODE (inside) == CONST)
6221 inside = XEXP (inside, 0);
6223 if (GET_CODE (inside) == LABEL_REF)
6226 if (GET_CODE (inside) == PLUS
6227 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
6228 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
6231 /* Only post inc allowed. */
6232 if (GET_CODE (inside) == PRE_DEC)
6236 if ((mode == QImode || mode == HImode)
6237 && (GET_CODE (op) == SUBREG
6238 && GET_CODE (XEXP (op, 0)) == REG
6239 && system_reg_operand (XEXP (op, 0), mode)))
6242 return general_operand (op, mode);
6245 /* Returns 1 if OP can be a destination of a move.
6246 Same as general_operand, but no preinc allowed. */
6249 general_movdst_operand (op, mode)
6251 enum machine_mode mode;
6253 /* Only pre dec allowed. */
6254 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
6257 return general_operand (op, mode);
6260 /* Returns 1 if OP is a normal arithmetic register. */
6263 arith_reg_operand (op, mode)
6265 enum machine_mode mode;
6267 if (register_operand (op, mode))
6271 if (GET_CODE (op) == REG)
6273 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6274 regno = REGNO (SUBREG_REG (op));
6278 return (regno != T_REG && regno != PR_REG
6279 && ! TARGET_REGISTER_P (regno)
6280 && (regno != FPUL_REG || TARGET_SH4)
6281 && regno != MACH_REG && regno != MACL_REG);
6286 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
6287 because this would lead to missing sign extensions when truncating from
6288 DImode to SImode. */
6290 arith_reg_dest (op, mode)
6292 enum machine_mode mode;
6294 if (mode == DImode && GET_CODE (op) == SUBREG
6295 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
6297 return arith_reg_operand (op, mode);
6301 int_gpr_dest (op, mode)
6303 enum machine_mode mode ATTRIBUTE_UNUSED;
6305 enum machine_mode op_mode = GET_MODE (op);
6307 if (GET_MODE_CLASS (op_mode) != MODE_INT
6308 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
6310 if (! reload_completed)
6312 return true_regnum (op) <= LAST_GENERAL_REG;
6316 fp_arith_reg_operand (op, mode)
6318 enum machine_mode mode;
6320 if (register_operand (op, mode))
6324 if (GET_CODE (op) == REG)
6326 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6327 regno = REGNO (SUBREG_REG (op));
6331 return (regno >= FIRST_PSEUDO_REGISTER
6332 || FP_REGISTER_P (regno));
6337 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
6340 arith_operand (op, mode)
6342 enum machine_mode mode;
6344 if (arith_reg_operand (op, mode))
6349 /* FIXME: We should be checking whether the CONST_INT fits in a
6350 CONST_OK_FOR_J here, but this causes reload_cse to crash when
6351 attempting to transform a sequence of two 64-bit sets of the
6352 same register from literal constants into a set and an add,
6353 when the difference is too wide for an add. */
6354 if (GET_CODE (op) == CONST_INT
6355 || EXTRA_CONSTRAINT_S (op))
6360 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
6366 /* Returns 1 if OP is a valid source operand for a compare insn. */
6369 arith_reg_or_0_operand (op, mode)
6371 enum machine_mode mode;
6373 if (arith_reg_operand (op, mode))
6376 if (EXTRA_CONSTRAINT_U (op))
6382 /* Return 1 if OP is a valid source operand for an SHmedia operation
6383 that takes either a register or a 6-bit immediate. */
6386 shmedia_6bit_operand (op, mode)
6388 enum machine_mode mode;
6390 return (arith_reg_operand (op, mode)
6391 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
6394 /* Returns 1 if OP is a valid source operand for a logical operation. */
6397 logical_operand (op, mode)
6399 enum machine_mode mode;
6401 if (arith_reg_operand (op, mode))
6406 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
6411 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
6418 and_operand (op, mode)
6420 enum machine_mode mode;
6422 if (logical_operand (op, mode))
6425 /* Check mshflo.l / mshflhi.l opportunities. */
6428 && GET_CODE (op) == CONST_INT
6429 && (INTVAL (op) == (unsigned) 0xffffffff
6430 || INTVAL (op) == (HOST_WIDE_INT) -1 << 32))
6436 /* Nonzero if OP is a floating point value with value 0.0. */
6439 fp_zero_operand (op)
6444 if (GET_MODE (op) != SFmode)
6447 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6448 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6451 /* Nonzero if OP is a floating point value with value 1.0. */
6459 if (GET_MODE (op) != SFmode)
6462 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6463 return REAL_VALUES_EQUAL (r, dconst1);
6466 /* For -m4 and -m4-single-only, mode switching is used. If we are
6467 compiling without -mfmovd, movsf_ie isn't taken into account for
6468 mode switching. We could check in machine_dependent_reorg for
6469 cases where we know we are in single precision mode, but there is
6470 interface to find that out during reload, so we must avoid
6471 choosing an fldi alternative during reload and thus failing to
6472 allocate a scratch register for the constant loading. */
6476 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6480 tertiary_reload_operand (op, mode)
6482 enum machine_mode mode ATTRIBUTE_UNUSED;
6484 enum rtx_code code = GET_CODE (op);
6485 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6489 fpscr_operand (op, mode)
6491 enum machine_mode mode ATTRIBUTE_UNUSED;
6493 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6494 && GET_MODE (op) == PSImode);
6498 fpul_operand (op, mode)
6500 enum machine_mode mode;
6503 return fp_arith_reg_operand (op, mode);
6505 return (GET_CODE (op) == REG
6506 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6507 && GET_MODE (op) == mode);
6511 symbol_ref_operand (op, mode)
6513 enum machine_mode mode ATTRIBUTE_UNUSED;
6515 return (GET_CODE (op) == SYMBOL_REF);
6518 /* Return the TLS type for TLS symbols, 0 for otherwise. */
6520 tls_symbolic_operand (op, mode)
6522 enum machine_mode mode ATTRIBUTE_UNUSED;
6526 if (GET_CODE (op) != SYMBOL_REF)
6530 STRIP_DATALABEL_ENCODING(str, str);
6531 if (! TLS_SYMNAME_P (str))
6537 return TLS_MODEL_GLOBAL_DYNAMIC;
6539 return TLS_MODEL_LOCAL_DYNAMIC;
6541 return TLS_MODEL_INITIAL_EXEC;
6543 return TLS_MODEL_LOCAL_EXEC;
6549 commutative_float_operator (op, mode)
6551 enum machine_mode mode;
6553 if (GET_MODE (op) != mode)
6555 switch (GET_CODE (op))
6567 noncommutative_float_operator (op, mode)
6569 enum machine_mode mode;
6571 if (GET_MODE (op) != mode)
6573 switch (GET_CODE (op))
6585 unary_float_operator (op, mode)
6587 enum machine_mode mode;
6589 if (GET_MODE (op) != mode)
6591 switch (GET_CODE (op))
6604 binary_float_operator (op, mode)
6606 enum machine_mode mode;
6608 if (GET_MODE (op) != mode)
6610 switch (GET_CODE (op))
6624 binary_logical_operator (op, mode)
6626 enum machine_mode mode;
6628 if (GET_MODE (op) != mode)
6630 switch (GET_CODE (op))
6643 equality_comparison_operator (op, mode)
6645 enum machine_mode mode;
6647 return ((mode == VOIDmode || GET_MODE (op) == mode)
6648 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6651 int greater_comparison_operator (op, mode)
6653 enum machine_mode mode;
6655 if (mode != VOIDmode && GET_MODE (op) == mode)
6657 switch (GET_CODE (op))
6669 int less_comparison_operator (op, mode)
6671 enum machine_mode mode;
6673 if (mode != VOIDmode && GET_MODE (op) == mode)
6675 switch (GET_CODE (op))
6687 /* Accept pseudos and branch target registers. */
6689 target_reg_operand (op, mode)
6691 enum machine_mode mode;
6694 || GET_MODE (op) != DImode)
6697 if (GET_CODE (op) == SUBREG)
6700 if (GET_CODE (op) != REG)
6703 /* We must protect ourselves from matching pseudos that are virtual
6704 register, because they will eventually be replaced with hardware
6705 registers that aren't branch-target registers. */
6706 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6707 || TARGET_REGISTER_P (REGNO (op)))
6713 /* Same as target_reg_operand, except that label_refs and symbol_refs
6714 are accepted before reload. */
6716 target_operand (op, mode)
6718 enum machine_mode mode;
6723 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6724 && EXTRA_CONSTRAINT_T (op))
6725 return ! reload_completed;
6727 return target_reg_operand (op, mode);
6731 mextr_bit_offset (op, mode)
6733 enum machine_mode mode ATTRIBUTE_UNUSED;
6737 if (GET_CODE (op) != CONST_INT)
6740 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6744 extend_reg_operand (op, mode)
6746 enum machine_mode mode;
6748 return (GET_CODE (op) == TRUNCATE
6750 : arith_reg_operand) (op, mode);
6754 trunc_hi_operand (op, mode)
6756 enum machine_mode mode;
6758 enum machine_mode op_mode = GET_MODE (op);
6760 if (op_mode != SImode && op_mode != DImode
6761 && op_mode != V4HImode && op_mode != V2SImode)
6763 return extend_reg_operand (op, mode);
6767 extend_reg_or_0_operand (op, mode)
6769 enum machine_mode mode;
6771 return (GET_CODE (op) == TRUNCATE
6773 : arith_reg_or_0_operand) (op, mode);
6777 general_extend_operand (op, mode)
6779 enum machine_mode mode;
6781 return (GET_CODE (op) == TRUNCATE
6783 : nonimmediate_operand) (op, mode);
6787 inqhi_operand (op, mode)
6789 enum machine_mode mode;
6791 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6794 /* Can't use true_regnum here because copy_cost wants to know about
6795 SECONDARY_INPUT_RELOAD_CLASS. */
6796 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6800 sh_rep_vec (v, mode)
6802 enum machine_mode mode;
6807 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6808 || (GET_MODE (v) != mode && mode != VOIDmode))
6810 i = XVECLEN (v, 0) - 2;
6811 x = XVECEXP (v, 0, i + 1);
6812 if (GET_MODE_UNIT_SIZE (mode) == 1)
6814 y = XVECEXP (v, 0, i);
6815 for (i -= 2 ; i >= 0; i -= 2)
6816 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6817 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6822 if (XVECEXP (v, 0, i) != x)
6827 /* Determine if V is a constant vector matching MODE with only one element
6828 that is not a sign extension. Two byte-sized elements count as one. */
6830 sh_1el_vec (v, mode)
6832 enum machine_mode mode;
6835 int i, last, least, sign_ix;
6838 if (GET_CODE (v) != CONST_VECTOR
6839 || (GET_MODE (v) != mode && mode != VOIDmode))
6841 /* Determine numbers of last and of least significant elements. */
6842 last = XVECLEN (v, 0) - 1;
6843 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6844 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6847 if (GET_MODE_UNIT_SIZE (mode) == 1)
6848 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6849 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6851 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6852 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6853 ? constm1_rtx : const0_rtx);
6854 i = XVECLEN (v, 0) - 1;
6856 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6863 sh_const_vec (v, mode)
6865 enum machine_mode mode;
6869 if (GET_CODE (v) != CONST_VECTOR
6870 || (GET_MODE (v) != mode && mode != VOIDmode))
6872 i = XVECLEN (v, 0) - 1;
6874 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6879 /* Return the destination address of a branch. */
6882 branch_dest (branch)
6885 rtx dest = SET_SRC (PATTERN (branch));
6888 if (GET_CODE (dest) == IF_THEN_ELSE)
6889 dest = XEXP (dest, 1);
6890 dest = XEXP (dest, 0);
6891 dest_uid = INSN_UID (dest);
6892 return INSN_ADDRESSES (dest_uid);
6895 /* Return nonzero if REG is not used after INSN.
6896 We assume REG is a reload reg, and therefore does
6897 not live past labels. It may live past calls or jumps though. */
6899 reg_unused_after (reg, insn)
6906 /* If the reg is set by this instruction, then it is safe for our
6907 case. Disregard the case where this is a store to memory, since
6908 we are checking a register used in the store address. */
6909 set = single_set (insn);
6910 if (set && GET_CODE (SET_DEST (set)) != MEM
6911 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6914 while ((insn = NEXT_INSN (insn)))
6916 code = GET_CODE (insn);
6919 /* If this is a label that existed before reload, then the register
6920 if dead here. However, if this is a label added by reorg, then
6921 the register may still be live here. We can't tell the difference,
6922 so we just ignore labels completely. */
6923 if (code == CODE_LABEL)
6928 if (code == JUMP_INSN)
6931 /* If this is a sequence, we must handle them all at once.
6932 We could have for instance a call that sets the target register,
6933 and an insn in a delay slot that uses the register. In this case,
6934 we must return 0. */
6935 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6940 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6942 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6943 rtx set = single_set (this_insn);
6945 if (GET_CODE (this_insn) == CALL_INSN)
6947 else if (GET_CODE (this_insn) == JUMP_INSN)
6949 if (INSN_ANNULLED_BRANCH_P (this_insn))
6954 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6956 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6958 if (GET_CODE (SET_DEST (set)) != MEM)
6964 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6969 else if (code == JUMP_INSN)
6972 else if (GET_RTX_CLASS (code) == 'i')
6974 rtx set = single_set (insn);
6976 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6978 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6979 return GET_CODE (SET_DEST (set)) != MEM;
6980 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
6984 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
6992 static GTY(()) rtx fpscr_rtx;
6998 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
6999 REG_USERVAR_P (fpscr_rtx) = 1;
7000 mark_user_reg (fpscr_rtx);
7002 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7003 mark_user_reg (fpscr_rtx);
7022 expand_sf_unop (fun, operands)
7023 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7026 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7030 expand_sf_binop (fun, operands)
7031 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7034 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7039 expand_df_unop (fun, operands)
7040 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7043 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7047 expand_df_binop (fun, operands)
7048 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7051 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7055 /* ??? gcc does flow analysis strictly after common subexpression
7056 elimination. As a result, common subexpression elimination fails
7057 when there are some intervening statements setting the same register.
7058 If we did nothing about this, this would hurt the precision switching
7059 for SH4 badly. There is some cse after reload, but it is unable to
7060 undo the extra register pressure from the unused instructions, and
7061 it cannot remove auto-increment loads.
7063 A C code example that shows this flow/cse weakness for (at least) SH
7064 and sparc (as of gcc ss-970706) is this:
7078 So we add another pass before common subexpression elimination, to
7079 remove assignments that are dead due to a following assignment in the
7080 same basic block. */
7083 mark_use (x, reg_set_block)
7084 rtx x, *reg_set_block;
7090 code = GET_CODE (x);
7095 int regno = REGNO (x);
7096 int nregs = (regno < FIRST_PSEUDO_REGISTER
7097 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7101 reg_set_block[regno + nregs - 1] = 0;
7108 rtx dest = SET_DEST (x);
7110 if (GET_CODE (dest) == SUBREG)
7111 dest = SUBREG_REG (dest);
7112 if (GET_CODE (dest) != REG)
7113 mark_use (dest, reg_set_block);
7114 mark_use (SET_SRC (x), reg_set_block);
7121 const char *fmt = GET_RTX_FORMAT (code);
7123 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7126 mark_use (XEXP (x, i), reg_set_block);
7127 else if (fmt[i] == 'E')
7128 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7129 mark_use (XVECEXP (x, i, j), reg_set_block);
7136 static rtx get_free_reg PARAMS ((HARD_REG_SET));
7138 /* This function returns a register to use to load the address to load
7139 the fpscr from. Currently it always returns r1 or r7, but when we are
7140 able to use pseudo registers after combine, or have a better mechanism
7141 for choosing a register, it should be done here. */
7142 /* REGS_LIVE is the liveness information for the point for which we
7143 need this allocation. In some bare-bones exit blocks, r1 is live at the
7144 start. We can even have all of r0..r3 being live:
7145 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7146 INSN before which new insns are placed with will clobber the register
7147 we return. If a basic block consists only of setting the return value
7148 register to a pseudo and using that register, the return value is not
7149 live before or after this block, yet we we'll insert our insns right in
7153 get_free_reg (regs_live)
7154 HARD_REG_SET regs_live;
7156 if (! TEST_HARD_REG_BIT (regs_live, 1))
7157 return gen_rtx_REG (Pmode, 1);
7159 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7160 there shouldn't be anything but a jump before the function end. */
7161 if (! TEST_HARD_REG_BIT (regs_live, 7))
7162 return gen_rtx_REG (Pmode, 7);
7167 /* This function will set the fpscr from memory.
7168 MODE is the mode we are setting it to. */
7170 fpscr_set_from_mem (mode, regs_live)
7172 HARD_REG_SET regs_live;
7174 enum attr_fp_mode fp_mode = mode;
7175 rtx addr_reg = get_free_reg (regs_live);
7177 if (fp_mode == (enum attr_fp_mode) NORMAL_MODE (FP_MODE))
7178 emit_insn (gen_fpu_switch1 (addr_reg));
7180 emit_insn (gen_fpu_switch0 (addr_reg));
7183 /* Is the given character a logical line separator for the assembler? */
7184 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7185 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7189 sh_insn_length_adjustment (insn)
7192 /* Instructions with unfilled delay slots take up an extra two bytes for
7193 the nop in the delay slot. */
7194 if (((GET_CODE (insn) == INSN
7195 && GET_CODE (PATTERN (insn)) != USE
7196 && GET_CODE (PATTERN (insn)) != CLOBBER)
7197 || GET_CODE (insn) == CALL_INSN
7198 || (GET_CODE (insn) == JUMP_INSN
7199 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7200 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7201 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7202 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7205 /* SH2e has a bug that prevents the use of annulled branches, so if
7206 the delay slot is not filled, we'll have to put a NOP in it. */
7207 if (sh_cpu == CPU_SH2E
7208 && GET_CODE (insn) == JUMP_INSN
7209 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7210 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7211 && get_attr_type (insn) == TYPE_CBRANCH
7212 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7215 /* sh-dsp parallel processing insn take four bytes instead of two. */
7217 if (GET_CODE (insn) == INSN)
7220 rtx body = PATTERN (insn);
7221 const char *template;
7223 int maybe_label = 1;
7225 if (GET_CODE (body) == ASM_INPUT)
7226 template = XSTR (body, 0);
7227 else if (asm_noperands (body) >= 0)
7229 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7238 while (c == ' ' || c == '\t');
7239 /* all sh-dsp parallel-processing insns start with p.
7240 The only non-ppi sh insn starting with p is pref.
7241 The only ppi starting with pr is prnd. */
7242 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7244 /* The repeat pseudo-insn expands two three insns, a total of
7245 six bytes in size. */
7246 else if ((c == 'r' || c == 'R')
7247 && ! strncasecmp ("epeat", template, 5))
7249 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7251 /* If this is a label, it is obviously not a ppi insn. */
7252 if (c == ':' && maybe_label)
7257 else if (c == '\'' || c == '"')
7262 maybe_label = c != ':';
7270 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7271 isn't protected by a PIC unspec. */
7273 nonpic_symbol_mentioned_p (x)
7276 register const char *fmt;
7279 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7280 || GET_CODE (x) == PC)
7283 /* We don't want to look into the possible MEM location of a
7284 CONST_DOUBLE, since we're not going to use it, in general. */
7285 if (GET_CODE (x) == CONST_DOUBLE)
7288 if (GET_CODE (x) == UNSPEC
7289 && (XINT (x, 1) == UNSPEC_PIC
7290 || XINT (x, 1) == UNSPEC_GOT
7291 || XINT (x, 1) == UNSPEC_GOTOFF
7292 || XINT (x, 1) == UNSPEC_GOTPLT
7293 || XINT (x, 1) == UNSPEC_GOTTPOFF
7294 || XINT (x, 1) == UNSPEC_DTPOFF
7295 || XINT (x, 1) == UNSPEC_PLT))
7298 fmt = GET_RTX_FORMAT (GET_CODE (x));
7299 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7305 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7306 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
7309 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
7316 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
7317 @GOTOFF in `reg'. */
7319 legitimize_pic_address (orig, mode, reg)
7321 enum machine_mode mode ATTRIBUTE_UNUSED;
7324 if (tls_symbolic_operand (orig, Pmode))
7327 if (GET_CODE (orig) == LABEL_REF
7328 || (GET_CODE (orig) == SYMBOL_REF
7329 && (CONSTANT_POOL_ADDRESS_P (orig)
7330 /* SYMBOL_REF_FLAG is set on static symbols. */
7331 || SYMBOL_REF_FLAG (orig))))
7334 reg = gen_reg_rtx (Pmode);
7336 emit_insn (gen_symGOTOFF2reg (reg, orig));
7339 else if (GET_CODE (orig) == SYMBOL_REF)
7342 reg = gen_reg_rtx (Pmode);
7344 emit_insn (gen_symGOT2reg (reg, orig));
7350 /* Mark the use of a constant in the literal table. If the constant
7351 has multiple labels, make it unique. */
7353 mark_constant_pool_use (x)
7356 rtx insn, lab, pattern;
7361 switch (GET_CODE (x))
7371 /* Get the first label in the list of labels for the same constant
7372 and delete another labels in the list. */
7374 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7376 if (GET_CODE (insn) != CODE_LABEL
7377 || LABEL_REFS (insn) != NEXT_INSN (insn))
7382 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
7383 INSN_DELETED_P (insn) = 1;
7385 /* Mark constants in a window. */
7386 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
7388 if (GET_CODE (insn) != INSN)
7391 pattern = PATTERN (insn);
7392 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
7395 switch (XINT (pattern, 1))
7397 case UNSPECV_CONST2:
7398 case UNSPECV_CONST4:
7399 case UNSPECV_CONST8:
7400 XVECEXP (pattern, 0, 1) = const1_rtx;
7402 case UNSPECV_WINDOW_END:
7403 if (XVECEXP (pattern, 0, 0) == x)
7406 case UNSPECV_CONST_END:
7416 /* Return true if it's possible to redirect BRANCH1 to the destination
7417 of an unconditional jump BRANCH2. We only want to do this if the
7418 resulting branch will have a short displacement. */
7420 sh_can_redirect_branch (branch1, branch2)
7424 if (flag_expensive_optimizations && simplejump_p (branch2))
7426 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
7430 for (distance = 0, insn = NEXT_INSN (branch1);
7431 insn && distance < 256;
7432 insn = PREV_INSN (insn))
7437 distance += get_attr_length (insn);
7439 for (distance = 0, insn = NEXT_INSN (branch1);
7440 insn && distance < 256;
7441 insn = NEXT_INSN (insn))
7446 distance += get_attr_length (insn);
7452 /* Return nonzero if register old_reg can be renamed to register new_reg. */
7454 sh_hard_regno_rename_ok (old_reg, new_reg)
7455 unsigned int old_reg ATTRIBUTE_UNUSED;
7456 unsigned int new_reg;
7459 /* Interrupt functions can only use registers that have already been
7460 saved by the prologue, even if they would normally be
7463 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
7469 /* Function to update the integer COST
7470 based on the relationship between INSN that is dependent on
7471 DEP_INSN through the dependence LINK. The default is to make no
7472 adjustment to COST. This can be used for example to specify to
7473 the scheduler that an output- or anti-dependence does not incur
7474 the same cost as a data-dependence. The return value should be
7475 the new value for COST. */
7477 sh_adjust_cost (insn, link, dep_insn, cost)
7479 rtx link ATTRIBUTE_UNUSED;
7487 /* On SHmedia, if the dependence is an anti-dependence or
7488 output-dependence, there is no cost. */
7489 if (REG_NOTE_KIND (link) != 0)
7492 if (get_attr_is_mac_media (insn)
7493 && get_attr_is_mac_media (dep_insn))
7496 else if (REG_NOTE_KIND (link) == 0)
7498 enum attr_type dep_type, type;
7500 if (recog_memoized (insn) < 0
7501 || recog_memoized (dep_insn) < 0)
7504 dep_type = get_attr_type (dep_insn);
7505 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7507 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7508 && (type = get_attr_type (insn)) != TYPE_CALL
7509 && type != TYPE_SFUNC)
7512 /* The only input for a call that is timing-critical is the
7513 function's address. */
7514 if (GET_CODE(insn) == CALL_INSN)
7516 rtx call = PATTERN (insn);
7518 if (GET_CODE (call) == PARALLEL)
7519 call = XVECEXP (call, 0 ,0);
7520 if (GET_CODE (call) == SET)
7521 call = SET_SRC (call);
7522 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7523 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7526 /* Likewise, the most timing critical input for an sfuncs call
7527 is the function address. However, sfuncs typically start
7528 using their arguments pretty quickly.
7529 Assume a four cycle delay before they are needed. */
7530 /* All sfunc calls are parallels with at least four components.
7531 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7532 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7533 && XVECLEN (PATTERN (insn), 0) >= 4
7534 && (reg = sfunc_uses_reg (insn)))
7536 if (! reg_set_p (reg, dep_insn))
7539 /* When the preceding instruction loads the shift amount of
7540 the following SHAD/SHLD, the latency of the load is increased
7543 && get_attr_type (insn) == TYPE_DYN_SHIFT
7544 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7545 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7546 XEXP (SET_SRC (single_set(insn)),
7549 /* When an LS group instruction with a latency of less than
7550 3 cycles is followed by a double-precision floating-point
7551 instruction, FIPR, or FTRV, the latency of the first
7552 instruction is increased to 3 cycles. */
7554 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7555 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7557 /* The lsw register of a double-precision computation is ready one
7559 else if (reload_completed
7560 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7561 && (use_pat = single_set (insn))
7562 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7566 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7567 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7570 /* An anti-dependence penalty of two applies if the first insn is a double
7571 precision fadd / fsub / fmul. */
7572 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7573 && recog_memoized (dep_insn) >= 0
7574 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7575 /* A lot of alleged anti-flow dependences are fake,
7576 so check this one is real. */
7577 && flow_dependent_p (dep_insn, insn))
7584 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7585 if DEP_INSN is anti-flow dependent on INSN. */
7587 flow_dependent_p (insn, dep_insn)
7590 rtx tmp = PATTERN (insn);
7592 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7593 return tmp == NULL_RTX;
7596 /* A helper function for flow_dependent_p called through note_stores. */
7598 flow_dependent_p_1 (x, pat, data)
7600 rtx pat ATTRIBUTE_UNUSED;
7603 rtx * pinsn = (rtx *) data;
7605 if (*pinsn && reg_referenced_p (x, *pinsn))
7609 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7610 'special function' patterns (type sfunc) that clobber pr, but that
7611 do not look like function calls to leaf_function_p. Hence we must
7612 do this extra check. */
7616 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7619 /* This Function returns nonzero if the DFA based scheduler interface
7620 is to be used. At present this is supported for the SH4 only. */
7622 sh_use_dfa_interface()
7624 if (TARGET_HARD_SH4)
7630 /* This function returns "2" to indicate dual issue for the SH4
7631 processor. To be used by the DFA pipeline description. */
7635 if (TARGET_SUPERSCALAR)
7641 /* SHmedia requires registers for branches, so we can't generate new
7642 branches past reload. */
7644 sh_cannot_modify_jumps_p ()
7646 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7650 sh_ms_bitfield_layout_p (record_type)
7651 tree record_type ATTRIBUTE_UNUSED;
7656 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
7657 may access it using GOTOFF instead of GOT. */
7660 sh_encode_section_info (decl, first)
7667 rtl = DECL_RTL (decl);
7669 rtl = TREE_CST_RTL (decl);
7670 if (GET_CODE (rtl) != MEM)
7672 symbol = XEXP (rtl, 0);
7673 if (GET_CODE (symbol) != SYMBOL_REF)
7677 SYMBOL_REF_FLAG (symbol) = (*targetm.binds_local_p) (decl);
7679 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7681 const char *symbol_str, *orig_str;
7683 enum tls_model kind;
7688 orig_str = XSTR (symbol, 0);
7689 is_local = (*targetm.binds_local_p) (decl);
7694 kind = TLS_MODEL_LOCAL_EXEC;
7696 kind = TLS_MODEL_INITIAL_EXEC;
7699 kind = TLS_MODEL_LOCAL_DYNAMIC;
7701 kind = TLS_MODEL_GLOBAL_DYNAMIC;
7702 if (kind < flag_tls_default)
7703 kind = flag_tls_default;
7705 STRIP_DATALABEL_ENCODING (symbol_str, orig_str);
7706 dlen = symbol_str - orig_str;
7708 encoding = " GLil"[kind];
7709 if (TLS_SYMNAME_P (symbol_str))
7711 if (encoding == symbol_str[1])
7713 /* Handle the changes from initial-exec to local-exec and
7714 from global-dynamic to local-dynamic. */
7715 if ((encoding == 'l' && symbol_str[1] == 'i')
7716 || (encoding == 'L' && symbol_str[1] == 'G'))
7722 len = strlen (symbol_str);
7723 newstr = alloca (dlen + len + 3);
7725 memcpy (newstr, orig_str, dlen);
7726 newstr[dlen + 0] = SH_TLS_ENCODING[0];
7727 newstr[dlen + 1] = encoding;
7728 memcpy (newstr + dlen + 2, symbol_str, len + 1);
7730 XSTR (symbol, 0) = ggc_alloc_string (newstr, dlen + len + 2);
7733 if (TARGET_SH5 && first && TREE_CODE (decl) != FUNCTION_DECL)
7734 XEXP (rtl, 0) = gen_datalabel_ref (symbol);
7737 /* Undo the effects of the above. */
7740 sh_strip_name_encoding (str)
7743 STRIP_DATALABEL_ENCODING (str, str);
7744 STRIP_TLS_ENCODING (str, str);
7751 On the SH1..SH4, the trampoline looks like
7752 2 0002 D202 mov.l l2,r2
7753 1 0000 D301 mov.l l1,r3
7756 5 0008 00000000 l1: .long area
7757 6 000c 00000000 l2: .long function
7759 SH5 (compact) uses r1 instead of r3 for the static chain. */
7762 /* Emit RTL insns to initialize the variable parts of a trampoline.
7763 FNADDR is an RTX for the address of the function's pure code.
7764 CXT is an RTX for the static chain value for the function. */
7767 sh_initialize_trampoline (tramp, fnaddr, cxt)
7768 rtx tramp, fnaddr, cxt;
7770 if (TARGET_SHMEDIA64)
7775 rtx movi1 = GEN_INT (0xcc000010);
7776 rtx shori1 = GEN_INT (0xc8000010);
7779 /* The following trampoline works within a +- 128 KB range for cxt:
7780 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7781 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7782 gettr tr1,r1; blink tr0,r63 */
7783 /* Address rounding makes it hard to compute the exact bounds of the
7784 offset for this trampoline, but we have a rather generous offset
7785 range, so frame_offset should do fine as an upper bound. */
7786 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7788 /* ??? could optimize this trampoline initialization
7789 by writing DImode words with two insns each. */
7790 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7791 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7792 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7793 insn = gen_rtx_AND (DImode, insn, mask);
7794 /* Or in ptb/u .,tr1 pattern */
7795 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7796 insn = force_operand (insn, NULL_RTX);
7797 insn = gen_lowpart (SImode, insn);
7798 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7799 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7800 insn = gen_rtx_AND (DImode, insn, mask);
7801 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7802 insn = gen_lowpart (SImode, insn);
7803 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7804 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7805 insn = gen_rtx_AND (DImode, insn, mask);
7806 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7807 insn = gen_lowpart (SImode, insn);
7808 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7809 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7810 insn = gen_rtx_AND (DImode, insn, mask);
7811 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7812 insn = gen_lowpart (SImode, insn);
7813 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7815 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7816 insn = gen_rtx_AND (DImode, insn, mask);
7817 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7818 insn = gen_lowpart (SImode, insn);
7819 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7821 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7822 GEN_INT (0x6bf10600));
7823 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7824 GEN_INT (0x4415fc10));
7825 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7826 GEN_INT (0x4401fff0));
7827 emit_insn (gen_ic_invalidate_line (tramp));
7830 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7831 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7833 tramp_templ = gen_datalabel_ref (tramp_templ);
7834 dst = gen_rtx_MEM (BLKmode, tramp);
7835 src = gen_rtx_MEM (BLKmode, tramp_templ);
7836 set_mem_align (dst, 256);
7837 set_mem_align (src, 64);
7838 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7840 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7842 emit_move_insn (gen_rtx_MEM (Pmode,
7843 plus_constant (tramp,
7845 + GET_MODE_SIZE (Pmode))),
7847 emit_insn (gen_ic_invalidate_line (tramp));
7850 else if (TARGET_SHMEDIA)
7852 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7853 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7854 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7855 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7856 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7857 rotated 10 right, and higher 16 bit of every 32 selected. */
7859 = force_reg (V2HImode, (simplify_gen_subreg
7860 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7861 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7862 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7864 tramp = force_reg (Pmode, tramp);
7865 fnaddr = force_reg (SImode, fnaddr);
7866 cxt = force_reg (SImode, cxt);
7867 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7868 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7870 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
7871 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7872 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7873 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7874 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7875 gen_rtx_SUBREG (V2HImode, cxt, 0),
7877 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
7878 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7879 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7880 if (TARGET_LITTLE_ENDIAN)
7882 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7883 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7887 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7888 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7890 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7891 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7892 emit_insn (gen_ic_invalidate_line (tramp));
7895 else if (TARGET_SHCOMPACT)
7897 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7900 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7901 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7903 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7904 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7906 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7908 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7912 if (TARGET_USERMODE)
7913 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),
7914 0, VOIDmode, 1, tramp, SImode);
7916 emit_insn (gen_ic_invalidate_line (tramp));
7920 /* FIXME: This is overly conservative. A SHcompact function that
7921 receives arguments ``by reference'' will have them stored in its
7922 own stack frame, so it must not pass pointers or references to
7923 these arguments to other functions by means of sibling calls. */
7925 sh_function_ok_for_sibcall (decl, exp)
7927 tree exp ATTRIBUTE_UNUSED;
7930 && (! TARGET_SHCOMPACT
7931 || current_function_args_info.stack_regs == 0));
7934 /* Machine specific built-in functions. */
7936 struct builtin_description
7938 const enum insn_code icode;
7939 const char *const name;
7943 /* describe number and signedness of arguments; arg[0] == result
7944 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7945 static const char signature_args[][4] =
7947 #define SH_BLTIN_V2SI2 0
7949 #define SH_BLTIN_V4HI2 1
7951 #define SH_BLTIN_V2SI3 2
7953 #define SH_BLTIN_V4HI3 3
7955 #define SH_BLTIN_V8QI3 4
7957 #define SH_BLTIN_MAC_HISI 5
7959 #define SH_BLTIN_SH_HI 6
7961 #define SH_BLTIN_SH_SI 7
7963 #define SH_BLTIN_V4HI2V2SI 8
7965 #define SH_BLTIN_V4HI2V8QI 9
7967 #define SH_BLTIN_SISF 10
7969 #define SH_BLTIN_LDUA_L 11
7971 #define SH_BLTIN_LDUA_Q 12
7973 #define SH_BLTIN_STUA_L 13
7975 #define SH_BLTIN_STUA_Q 14
7977 #define SH_BLTIN_UDI 15
7979 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
7980 #define SH_BLTIN_2 16
7981 #define SH_BLTIN_SU 16
7983 #define SH_BLTIN_3 17
7984 #define SH_BLTIN_SUS 17
7986 #define SH_BLTIN_PSSV 18
7988 #define SH_BLTIN_XXUU 19
7989 #define SH_BLTIN_UUUU 19
7991 #define SH_BLTIN_PV 20
7994 /* mcmv: operands considered unsigned. */
7995 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
7996 /* mperm: control value considered unsigned int. */
7997 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
7998 /* mshards_q: returns signed short. */
7999 /* nsb: takes long long arg, returns unsigned char. */
8000 static const struct builtin_description bdesc[] =
8002 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
8003 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
8004 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
8005 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
8006 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
8007 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
8008 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
8010 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8011 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8013 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
8014 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
8015 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
8016 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
8017 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
8018 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
8019 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
8020 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
8021 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
8022 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
8023 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
8024 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
8025 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
8026 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
8027 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
8028 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
8029 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
8030 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
8031 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
8032 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
8033 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
8034 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
8035 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
8036 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
8037 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
8038 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
8039 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
8040 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
8041 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
8042 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
8043 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
8044 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
8045 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
8046 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
8047 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
8048 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
8049 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
8050 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
8051 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
8052 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
8053 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
8054 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
8055 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
8056 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
8057 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
8058 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
8059 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
8060 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
8061 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
8062 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
8063 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
8064 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
8065 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
8066 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
8068 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8069 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8070 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8071 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8072 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8073 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8074 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8075 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8076 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8077 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8078 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8079 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8080 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8081 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8082 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8083 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8085 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
8086 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
8088 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8089 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8094 sh_media_init_builtins ()
8096 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8097 const struct builtin_description *d;
8099 memset (shared, 0, sizeof shared);
8100 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
8102 tree type, arg_type;
8103 int signature = d->signature;
8106 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8107 type = shared[signature];
8110 int has_result = signature_args[signature][0] != 0;
8112 if (signature_args[signature][1] == 8
8113 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8115 if (! TARGET_FPU_ANY
8116 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8118 type = void_list_node;
8121 int arg = signature_args[signature][i];
8122 int opno = i - 1 + has_result;
8125 arg_type = ptr_type_node;
8127 arg_type = ((*lang_hooks.types.type_for_mode)
8128 (insn_data[d->icode].operand[opno].mode,
8133 arg_type = void_type_node;
8136 type = tree_cons (NULL_TREE, arg_type, type);
8138 type = build_function_type (arg_type, type);
8139 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8140 shared[signature] = type;
8142 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8151 sh_media_init_builtins ();
8154 /* Expand an expression EXP that calls a built-in function,
8155 with result going to TARGET if that's convenient
8156 (and in mode MODE if that's convenient).
8157 SUBTARGET may be used as the target for computing one of EXP's operands.
8158 IGNORE is nonzero if the value is to be ignored. */
8161 sh_expand_builtin (exp, target, subtarget, mode, ignore)
8164 rtx subtarget ATTRIBUTE_UNUSED;
8165 enum machine_mode mode ATTRIBUTE_UNUSED;
8168 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8169 tree arglist = TREE_OPERAND (exp, 1);
8170 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8171 const struct builtin_description *d = &bdesc[fcode];
8172 enum insn_code icode = d->icode;
8173 int signature = d->signature;
8174 enum machine_mode tmode = VOIDmode;
8179 if (signature_args[signature][0])
8184 tmode = insn_data[icode].operand[0].mode;
8186 || GET_MODE (target) != tmode
8187 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8188 target = gen_reg_rtx (tmode);
8194 for (i = 1; i <= 3; i++, nop++)
8197 enum machine_mode opmode, argmode;
8199 if (! signature_args[signature][i])
8201 arg = TREE_VALUE (arglist);
8202 if (arg == error_mark_node)
8204 arglist = TREE_CHAIN (arglist);
8205 opmode = insn_data[icode].operand[nop].mode;
8206 argmode = TYPE_MODE (TREE_TYPE (arg));
8207 if (argmode != opmode)
8208 arg = build1 (NOP_EXPR,
8209 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8210 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8211 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8212 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8218 pat = (*insn_data[d->icode].genfun) (op[0]);
8221 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8224 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8227 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8239 sh_expand_unop_v2sf (code, op0, op1)
8243 rtx sel0 = const0_rtx;
8244 rtx sel1 = const1_rtx;
8245 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
8246 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8248 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8249 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8253 sh_expand_binop_v2sf (code, op0, op1, op2)
8257 rtx sel0 = const0_rtx;
8258 rtx sel1 = const1_rtx;
8259 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx))
8261 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8263 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8264 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8267 /* Return the class of registers for which a mode change from FROM to TO
8270 sh_cannot_change_mode_class (from, to, class)
8271 enum machine_mode from, to;
8272 enum reg_class class;
8274 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8276 if (TARGET_LITTLE_ENDIAN)
8278 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8279 return reg_classes_intersect_p (DF_REGS, class);
8283 if (GET_MODE_SIZE (from) < 8)
8284 return reg_classes_intersect_p (DF_HI_REGS, class);
8291 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8292 that label is used. */
8295 sh_mark_label (address, nuses)
8299 if (GOTOFF_P (address))
8301 /* Extract the label or symbol. */
8302 address = XEXP (address, 0);
8303 if (GET_CODE (address) == PLUS)
8304 address = XEXP (address, 0);
8305 address = XVECEXP (address, 0, 0);
8307 if (GET_CODE (address) == LABEL_REF
8308 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8309 LABEL_NUSES (XEXP (address, 0)) += nuses;
8312 /* Compute extra cost of moving data between one register class
8315 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8316 uses this information. Hence, the general register <-> floating point
8317 register information here is not used for SFmode. */
8320 sh_register_move_cost (mode, srcclass, dstclass)
8321 enum machine_mode mode;
8322 enum reg_class srcclass, dstclass;
8324 if (dstclass == T_REGS || dstclass == PR_REGS)
8327 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8328 && REGCLASS_HAS_FP_REG (srcclass)
8329 && REGCLASS_HAS_FP_REG (dstclass))
8332 if ((REGCLASS_HAS_FP_REG (dstclass)
8333 && REGCLASS_HAS_GENERAL_REG (srcclass))
8334 || (REGCLASS_HAS_GENERAL_REG (dstclass)
8335 && REGCLASS_HAS_FP_REG (srcclass)))
8336 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
8337 * ((GET_MODE_SIZE (mode) + 7) / 8U));
8339 if ((dstclass == FPUL_REGS
8340 && REGCLASS_HAS_GENERAL_REG (srcclass))
8341 || (srcclass == FPUL_REGS
8342 && REGCLASS_HAS_GENERAL_REG (dstclass)))
8345 if ((dstclass == FPUL_REGS
8346 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
8347 || (srcclass == FPUL_REGS
8348 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
8351 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8352 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8355 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8356 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8361 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
8362 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
8363 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
8365 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
8368 /* Like register_operand, but take into account that SHMEDIA can use
8369 the constant zero like a general register. */
8371 sh_register_operand (op, mode)
8373 enum machine_mode mode;
8375 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
8377 return register_operand (op, mode);
8380 static rtx emit_load_ptr PARAMS ((rtx, rtx));
8383 emit_load_ptr (reg, addr)
8386 rtx mem = gen_rtx_MEM (ptr_mode, addr);
8388 if (Pmode != ptr_mode)
8389 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
8390 return emit_move_insn (reg, mem);
8394 sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
8396 tree thunk_fndecl ATTRIBUTE_UNUSED;
8397 HOST_WIDE_INT delta;
8398 HOST_WIDE_INT vcall_offset;
8401 CUMULATIVE_ARGS cum;
8402 int structure_value_byref = 0;
8403 rtx this, this_value, sibcall, insns, funexp;
8404 tree funtype = TREE_TYPE (function);
8406 = (TARGET_SHMEDIA ? CONST_OK_FOR_J (delta) : CONST_OK_FOR_I (delta));
8408 rtx scratch0, scratch1, scratch2;
8410 reload_completed = 1;
8412 current_function_uses_only_leaf_regs = 1;
8414 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8416 /* Find the "this" pointer. We have such a wide range of ABIs for the
8417 SH that it's best to do this completely machine independently.
8418 "this" is passed as first argument, unless a structure return pointer
8419 comes first, in which case "this" comes second. */
8420 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0);
8421 #ifndef PCC_STATIC_STRUCT_RETURN
8422 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
8423 structure_value_byref = 1;
8424 #endif /* not PCC_STATIC_STRUCT_RETURN */
8425 if (structure_value_byref && struct_value_rtx == 0)
8427 tree ptype = build_pointer_type (TREE_TYPE (funtype));
8429 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
8431 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
8433 /* For SHcompact, we only have r0 for a scratch register: r1 is the
8434 static chain pointer (even if you can't have nested virtual functions
8435 right now, someone might implement them sometime), and the rest of the
8436 registers are used for argument passing, are callee-saved, or reserved. */
8437 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
8440 scratch1 = gen_rtx_REG (ptr_mode, 1);
8441 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
8442 pointing where to return struct values. */
8443 scratch2 = gen_rtx_REG (Pmode, 3);
8445 else if (TARGET_SHMEDIA)
8447 scratch1 = gen_rtx_REG (ptr_mode, 21);
8448 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
8451 this_value = plus_constant (this, delta);
8453 && (simple_add || scratch0 != scratch1)
8454 && strict_memory_address_p (ptr_mode, this_value))
8456 emit_load_ptr (scratch0, this_value);
8462 else if (simple_add)
8463 emit_move_insn (this, this_value);
8466 emit_move_insn (scratch1, GEN_INT (delta));
8467 emit_insn (gen_add2_insn (this, scratch1));
8475 emit_load_ptr (scratch0, this);
8477 offset_addr = plus_constant (scratch0, vcall_offset);
8478 if (strict_memory_address_p (ptr_mode, offset_addr))
8480 else if (! TARGET_SH5)
8482 /* scratch0 != scratch1, and we have indexed loads. Get better
8483 schedule by loading the offset into r1 and using an indexed
8484 load - then the load of r1 can issue before the load from
8485 (this + delta) finishes. */
8486 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8487 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
8489 else if (TARGET_SHMEDIA
8490 ? CONST_OK_FOR_J (vcall_offset)
8491 : CONST_OK_FOR_I (vcall_offset))
8493 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
8494 offset_addr = scratch0;
8496 else if (scratch0 != scratch1)
8498 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8499 emit_insn (gen_add2_insn (scratch0, scratch1));
8500 offset_addr = scratch0;
8503 abort (); /* FIXME */
8504 emit_load_ptr (scratch0, offset_addr);
8506 if (Pmode != ptr_mode)
8507 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
8508 emit_insn (gen_add2_insn (this, scratch0));
8511 /* Generate a tail call to the target function. */
8512 if (! TREE_USED (function))
8514 assemble_external (function);
8515 TREE_USED (function) = 1;
8517 funexp = XEXP (DECL_RTL (function), 0);
8518 emit_move_insn (scratch2, funexp);
8519 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
8520 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
8521 SIBLING_CALL_P (sibcall) = 1;
8522 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
8525 /* Run just enough of rest_of_compilation to do scheduling and get
8526 the insns emitted. Note that use_thunk calls
8527 assemble_start_function and assemble_end_function. */
8528 insns = get_insns ();
8530 if (optimize > 0 && flag_schedule_insns_after_reload)
8533 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
8534 life_analysis (insns, rtl_dump_file, PROP_FINAL);
8536 split_all_insns (1);
8538 schedule_insns (rtl_dump_file);
8541 MACHINE_DEPENDENT_REORG (insns);
8543 if (optimize > 0 && flag_delayed_branch)
8544 dbr_schedule (insns, rtl_dump_file);
8545 shorten_branches (insns);
8546 final_start_function (insns, file, 1);
8547 final (insns, file, 1, 0);
8548 final_end_function ();
8550 if (optimize > 0 && flag_schedule_insns_after_reload)
8552 /* Release all memory allocated by flow. */
8553 free_basic_block_vars (0);
8555 /* Release all memory held by regsets now. */
8556 regset_release_memory ();
8559 reload_completed = 0;