1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
51 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
53 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
54 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
56 /* These are some macros to abstract register modes. */
57 #define CONST_OK_FOR_ADD(size) \
58 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
59 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
60 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
61 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
63 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
64 int current_function_interrupt;
66 /* ??? The pragma interrupt support will not work for SH3. */
67 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
68 output code for the next function appropriate for an interrupt handler. */
71 /* This is set by the trap_exit attribute for functions. It specifies
72 a trap number to be used in a trapa instruction at function exit
73 (instead of an rte instruction). */
76 /* This is used by the sp_switch attribute for functions. It specifies
77 a variable holding the address of the stack the interrupt function
78 should switch to/from at entry/exit. */
81 /* This is set by #pragma trapa, and is similar to the above, except that
82 the compiler doesn't emit code to preserve all registers. */
83 static int pragma_trapa;
85 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
86 which has a separate set of low regs for User and Supervisor modes.
87 This should only be used for the lowest level of interrupts. Higher levels
88 of interrupts must save the registers in case they themselves are
90 int pragma_nosave_low_regs;
92 /* This is used for communication between SETUP_INCOMING_VARARGS and
93 sh_expand_prologue. */
94 int current_function_anonymous_args;
96 /* Global variables for machine-dependent things. */
98 /* Which cpu are we scheduling for. */
99 enum processor_type sh_cpu;
101 /* Saved operands from the last compare to use when we generate an scc
107 /* Provides the class number of the smallest class containing
110 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
112 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
149 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
153 char sh_register_names[FIRST_PSEUDO_REGISTER] \
154 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
156 char sh_additional_register_names[ADDREGNAMES_SIZE] \
157 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
158 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
160 /* Provide reg_class from a letter such as appears in the machine
161 description. *: target independently reserved letter.
162 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
164 enum reg_class reg_class_from_letter[] =
166 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
167 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
168 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
169 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
170 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
171 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
172 /* y */ FPUL_REGS, /* z */ R0_REGS
175 int assembler_dialect;
177 static void split_branches PARAMS ((rtx));
178 static int branch_dest PARAMS ((rtx));
179 static void force_into PARAMS ((rtx, rtx));
180 static void print_slot PARAMS ((rtx));
181 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
182 static void dump_table PARAMS ((rtx));
183 static int hi_const PARAMS ((rtx));
184 static int broken_move PARAMS ((rtx));
185 static int mova_p PARAMS ((rtx));
186 static rtx find_barrier PARAMS ((int, rtx, rtx));
187 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
188 static rtx gen_block_redirect PARAMS ((rtx, int, int));
189 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
190 static rtx frame_insn PARAMS ((rtx));
191 static rtx push PARAMS ((int));
192 static void pop PARAMS ((int));
193 static void push_regs PARAMS ((HARD_REG_SET *, int));
194 static int calc_live_regs PARAMS ((HARD_REG_SET *));
195 static void mark_use PARAMS ((rtx, rtx *));
196 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
197 static rtx mark_constant_pool_use PARAMS ((rtx));
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
200 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
201 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
202 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
203 static void sh_insert_attributes PARAMS ((tree, tree *));
204 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
205 static int sh_use_dfa_interface PARAMS ((void));
206 static int sh_issue_rate PARAMS ((void));
207 static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
209 static bool sh_cannot_modify_jumps_p PARAMS ((void));
210 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
212 static void sh_init_builtins PARAMS ((void));
213 static void sh_media_init_builtins PARAMS ((void));
214 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
215 static void sh_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
216 HOST_WIDE_INT, tree));
217 static int flow_dependent_p PARAMS ((rtx, rtx));
218 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
219 static int shiftcosts PARAMS ((rtx));
220 static int andcosts PARAMS ((rtx));
221 static int addsubcosts PARAMS ((rtx));
222 static int multcosts PARAMS ((rtx));
223 static bool unspec_caller_rtx_p PARAMS ((rtx));
224 static bool sh_cannot_copy_insn_p PARAMS ((rtx));
225 static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
226 static int sh_address_cost PARAMS ((rtx));
228 /* Initialize the GCC target structure. */
229 #undef TARGET_ATTRIBUTE_TABLE
230 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
232 /* The next two are used for debug info when compiling with -gdwarf. */
233 #undef TARGET_ASM_UNALIGNED_HI_OP
234 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
235 #undef TARGET_ASM_UNALIGNED_SI_OP
236 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
238 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
239 #undef TARGET_ASM_UNALIGNED_DI_OP
240 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
241 #undef TARGET_ASM_ALIGNED_DI_OP
242 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
244 #undef TARGET_ASM_FUNCTION_EPILOGUE
245 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
247 #undef TARGET_ASM_OUTPUT_MI_THUNK
248 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
250 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
251 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
253 #undef TARGET_INSERT_ATTRIBUTES
254 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
256 #undef TARGET_SCHED_ADJUST_COST
257 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
259 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
260 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
262 #undef TARGET_SCHED_ISSUE_RATE
263 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
265 #undef TARGET_CANNOT_MODIFY_JUMPS_P
266 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
268 #undef TARGET_MS_BITFIELD_LAYOUT_P
269 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
271 #undef TARGET_INIT_BUILTINS
272 #define TARGET_INIT_BUILTINS sh_init_builtins
273 #undef TARGET_EXPAND_BUILTIN
274 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
276 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
277 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
279 #undef TARGET_CANNOT_COPY_INSN_P
280 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
281 #undef TARGET_RTX_COSTS
282 #define TARGET_RTX_COSTS sh_rtx_costs
283 #undef TARGET_ADDRESS_COST
284 #define TARGET_ADDRESS_COST sh_address_cost
287 #undef TARGET_HAVE_TLS
288 #define TARGET_HAVE_TLS true
291 struct gcc_target targetm = TARGET_INITIALIZER;
293 /* Print the operand address in x to the stream. */
296 print_operand_address (stream, x)
300 switch (GET_CODE (x))
304 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
309 rtx base = XEXP (x, 0);
310 rtx index = XEXP (x, 1);
312 switch (GET_CODE (index))
315 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
316 reg_names[true_regnum (base)]);
322 int base_num = true_regnum (base);
323 int index_num = true_regnum (index);
325 fprintf (stream, "@(r0,%s)",
326 reg_names[MAX (base_num, index_num)]);
338 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
342 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
346 x = mark_constant_pool_use (x);
347 output_addr_const (stream, x);
352 /* Print operand x (an rtx) in assembler syntax to file stream
353 according to modifier code.
355 '.' print a .s if insn needs delay slot
356 ',' print LOCAL_LABEL_PREFIX
357 '@' print trap, rte or rts depending upon pragma interruptness
358 '#' output a nop if there is nothing to put in the delay slot
359 ''' print likelyhood suffix (/u for unlikely).
360 'O' print a constant without the #
361 'R' print the LSW of a dp value - changes if in little endian
362 'S' print the MSW of a dp value - changes if in little endian
363 'T' print the next word of a dp value - same as 'R' in big endian mode.
364 'M' print an `x' if `m' will print `base,index'.
365 'N' print 'r63' if the operand is (const_int 0).
366 'm' print a pair `base,offset' or `base,index', for LD and ST.
367 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
368 'o' output an operator. */
371 print_operand (stream, x, code)
380 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
381 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
382 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
385 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
389 fprintf (stream, "trapa #%d", trap_exit);
390 else if (sh_cfun_interrupt_handler_p ())
391 fprintf (stream, "rte");
393 fprintf (stream, "rts");
396 /* Output a nop if there's nothing in the delay slot. */
397 if (dbr_sequence_length () == 0)
398 fprintf (stream, "\n\tnop");
402 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
404 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
405 fputs ("/u", stream);
409 x = mark_constant_pool_use (x);
410 output_addr_const (stream, x);
413 fputs (reg_names[REGNO (x) + LSW], (stream));
416 fputs (reg_names[REGNO (x) + MSW], (stream));
419 /* Next word of a double. */
420 switch (GET_CODE (x))
423 fputs (reg_names[REGNO (x) + 1], (stream));
426 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
427 && GET_CODE (XEXP (x, 0)) != POST_INC)
428 x = adjust_address (x, SImode, 4);
429 print_operand_address (stream, XEXP (x, 0));
436 switch (GET_CODE (x))
438 case PLUS: fputs ("add", stream); break;
439 case MINUS: fputs ("sub", stream); break;
440 case MULT: fputs ("mul", stream); break;
441 case DIV: fputs ("div", stream); break;
442 case EQ: fputs ("eq", stream); break;
443 case NE: fputs ("ne", stream); break;
444 case GT: case LT: fputs ("gt", stream); break;
445 case GE: case LE: fputs ("ge", stream); break;
446 case GTU: case LTU: fputs ("gtu", stream); break;
447 case GEU: case LEU: fputs ("geu", stream); break;
453 if (GET_CODE (x) == MEM
454 && GET_CODE (XEXP (x, 0)) == PLUS
455 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
456 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
461 if (GET_CODE (x) != MEM)
464 switch (GET_CODE (x))
468 print_operand (stream, x, 0);
469 fputs (", 0", stream);
473 print_operand (stream, XEXP (x, 0), 0);
474 fputs (", ", stream);
475 print_operand (stream, XEXP (x, 1), 0);
484 if (x == CONST0_RTX (GET_MODE (x)))
486 fprintf ((stream), "r63");
491 if (GET_CODE (x) == CONST_INT)
493 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
500 switch (GET_CODE (x))
502 /* FIXME: We need this on SHmedia32 because reload generates
503 some sign-extended HI or QI loads into DImode registers
504 but, because Pmode is SImode, the address ends up with a
505 subreg:SI of the DImode register. Maybe reload should be
506 fixed so as to apply alter_subreg to such loads? */
508 if (SUBREG_BYTE (x) != 0
509 || GET_CODE (SUBREG_REG (x)) != REG)
516 if (FP_REGISTER_P (REGNO (x))
517 && GET_MODE (x) == V16SFmode)
518 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
519 else if (FP_REGISTER_P (REGNO (x))
520 && GET_MODE (x) == V4SFmode)
521 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
522 else if (GET_CODE (x) == REG
523 && GET_MODE (x) == V2SFmode)
524 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
525 else if (FP_REGISTER_P (REGNO (x))
526 && GET_MODE_SIZE (GET_MODE (x)) > 4)
527 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
529 fputs (reg_names[REGNO (x)], (stream));
533 output_address (XEXP (x, 0));
538 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
539 && GET_MODE (XEXP (x, 0)) == DImode
540 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
541 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
543 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
546 if (GET_CODE (val) == ASHIFTRT)
549 if (GET_CODE (XEXP (val, 0)) == CONST)
551 output_addr_const (stream, XEXP (val, 0));
552 if (GET_CODE (XEXP (val, 0)) == CONST)
554 fputs (" >> ", stream);
555 output_addr_const (stream, XEXP (val, 1));
560 if (GET_CODE (val) == CONST)
562 output_addr_const (stream, val);
563 if (GET_CODE (val) == CONST)
566 fputs (" & 65535)", stream);
574 output_addr_const (stream, x);
581 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
583 force_into (value, target)
586 value = force_operand (value, target);
587 if (! rtx_equal_p (value, target))
588 emit_insn (gen_move_insn (target, value));
591 /* Emit code to perform a block move. Choose the best method.
593 OPERANDS[0] is the destination.
594 OPERANDS[1] is the source.
595 OPERANDS[2] is the size.
596 OPERANDS[3] is the alignment safe to use. */
599 expand_block_move (operands)
602 int align = INTVAL (operands[3]);
603 int constp = (GET_CODE (operands[2]) == CONST_INT);
604 int bytes = (constp ? INTVAL (operands[2]) : 0);
606 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
607 alignment, or if it isn't a multiple of 4 bytes, then fail. */
608 if (! constp || align < 4 || (bytes % 4 != 0))
615 else if (bytes == 12)
620 rtx r4 = gen_rtx (REG, SImode, 4);
621 rtx r5 = gen_rtx (REG, SImode, 5);
623 entry_name = get_identifier ("__movstrSI12_i4");
625 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
626 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
627 force_into (XEXP (operands[0], 0), r4);
628 force_into (XEXP (operands[1], 0), r5);
629 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
632 else if (! TARGET_SMALLCODE)
638 rtx r4 = gen_rtx (REG, SImode, 4);
639 rtx r5 = gen_rtx (REG, SImode, 5);
640 rtx r6 = gen_rtx (REG, SImode, 6);
642 entry_name = get_identifier (bytes & 4
644 : "__movstr_i4_even");
645 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
646 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
647 force_into (XEXP (operands[0], 0), r4);
648 force_into (XEXP (operands[1], 0), r5);
651 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
652 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
664 rtx r4 = gen_rtx_REG (SImode, 4);
665 rtx r5 = gen_rtx_REG (SImode, 5);
667 sprintf (entry, "__movstrSI%d", bytes);
668 entry_name = get_identifier (entry);
669 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
670 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
671 force_into (XEXP (operands[0], 0), r4);
672 force_into (XEXP (operands[1], 0), r5);
673 emit_insn (gen_block_move_real (func_addr_rtx));
677 /* This is the same number of bytes as a memcpy call, but to a different
678 less common function name, so this will occasionally use more space. */
679 if (! TARGET_SMALLCODE)
684 int final_switch, while_loop;
685 rtx r4 = gen_rtx_REG (SImode, 4);
686 rtx r5 = gen_rtx_REG (SImode, 5);
687 rtx r6 = gen_rtx_REG (SImode, 6);
689 entry_name = get_identifier ("__movstr");
690 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
691 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
692 force_into (XEXP (operands[0], 0), r4);
693 force_into (XEXP (operands[1], 0), r5);
695 /* r6 controls the size of the move. 16 is decremented from it
696 for each 64 bytes moved. Then the negative bit left over is used
697 as an index into a list of move instructions. e.g., a 72 byte move
698 would be set up with size(r6) = 14, for one iteration through the
699 big while loop, and a switch of -2 for the last part. */
701 final_switch = 16 - ((bytes / 4) % 16);
702 while_loop = ((bytes / 4) / 16 - 1) * 16;
703 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
704 emit_insn (gen_block_lump_real (func_addr_rtx));
711 /* Prepare operands for a move define_expand; specifically, one of the
712 operands must be in a register. */
715 prepare_move_operands (operands, mode)
717 enum machine_mode mode;
719 if ((mode == SImode || mode == DImode)
721 && ! ((mode == Pmode || mode == ptr_mode)
722 && tls_symbolic_operand (operands[1], Pmode) != 0))
725 if (SYMBOLIC_CONST_P (operands[1]))
727 if (GET_CODE (operands[0]) == MEM)
728 operands[1] = force_reg (Pmode, operands[1]);
729 else if (TARGET_SHMEDIA
730 && GET_CODE (operands[1]) == LABEL_REF
731 && target_reg_operand (operands[0], mode))
735 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
736 operands[1] = legitimize_pic_address (operands[1], mode, temp);
739 else if (GET_CODE (operands[1]) == CONST
740 && GET_CODE (XEXP (operands[1], 0)) == PLUS
741 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
743 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
744 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
746 operands[1] = expand_binop (mode, add_optab, temp,
747 XEXP (XEXP (operands[1], 0), 1),
748 no_new_pseudos ? temp
749 : gen_reg_rtx (Pmode),
754 if (! reload_in_progress && ! reload_completed)
756 /* Copy the source to a register if both operands aren't registers. */
757 if (! register_operand (operands[0], mode)
758 && ! sh_register_operand (operands[1], mode))
759 operands[1] = copy_to_mode_reg (mode, operands[1]);
761 /* This case can happen while generating code to move the result
762 of a library call to the target. Reject `st r0,@(rX,rY)' because
763 reload will fail to find a spill register for rX, since r0 is already
764 being used for the source. */
765 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
766 && GET_CODE (operands[0]) == MEM
767 && GET_CODE (XEXP (operands[0], 0)) == PLUS
768 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
769 operands[1] = copy_to_mode_reg (mode, operands[1]);
772 if (mode == Pmode || mode == ptr_mode)
775 enum tls_model tls_kind;
779 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
781 rtx tga_op1, tga_ret, tmp, tmp2;
786 case TLS_MODEL_GLOBAL_DYNAMIC:
787 tga_ret = gen_rtx_REG (Pmode, R0_REG);
788 emit_insn (gen_tls_global_dynamic (tga_ret, op1));
792 case TLS_MODEL_LOCAL_DYNAMIC:
793 tga_ret = gen_rtx_REG (Pmode, R0_REG);
794 emit_insn (gen_tls_local_dynamic (tga_ret, op1));
796 tmp = gen_reg_rtx (Pmode);
797 emit_move_insn (tmp, tga_ret);
799 if (register_operand (op0, Pmode))
802 tmp2 = gen_reg_rtx (Pmode);
804 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
808 case TLS_MODEL_INITIAL_EXEC:
810 emit_insn (gen_GOTaddr2picreg ());
811 tga_op1 = gen_reg_rtx (Pmode);
812 tmp = gen_sym2GOTTPOFF (op1);
813 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
817 case TLS_MODEL_LOCAL_EXEC:
818 tmp2 = gen_reg_rtx (Pmode);
819 emit_insn (gen_load_gbr (tmp2));
820 tmp = gen_reg_rtx (Pmode);
821 emit_insn (gen_symTPOFF2reg (tmp, op1));
822 RTX_UNCHANGING_P (tmp) = 1;
824 if (register_operand (op0, Pmode))
827 op1 = gen_reg_rtx (Pmode);
829 emit_insn (gen_addsi3 (op1, tmp, tmp2));
842 /* Prepare the operands for an scc instruction; make sure that the
843 compare has been done. */
845 prepare_scc_operands (code)
848 rtx t_reg = gen_rtx_REG (SImode, T_REG);
849 enum rtx_code oldcode = code;
850 enum machine_mode mode;
852 /* First need a compare insn. */
856 /* It isn't possible to handle this case. */
875 rtx tmp = sh_compare_op0;
876 sh_compare_op0 = sh_compare_op1;
877 sh_compare_op1 = tmp;
880 mode = GET_MODE (sh_compare_op0);
881 if (mode == VOIDmode)
882 mode = GET_MODE (sh_compare_op1);
884 sh_compare_op0 = force_reg (mode, sh_compare_op0);
885 if ((code != EQ && code != NE
886 && (sh_compare_op1 != const0_rtx
887 || code == GTU || code == GEU || code == LTU || code == LEU))
888 || (mode == DImode && sh_compare_op1 != const0_rtx)
889 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
890 sh_compare_op1 = force_reg (mode, sh_compare_op1);
892 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
893 (mode == SFmode ? emit_sf_insn : emit_df_insn)
894 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
895 gen_rtx (SET, VOIDmode, t_reg,
896 gen_rtx (code, SImode,
897 sh_compare_op0, sh_compare_op1)),
898 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
900 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
901 gen_rtx (code, SImode, sh_compare_op0,
907 /* Called from the md file, set up the operands of a compare instruction. */
910 from_compare (operands, code)
914 enum machine_mode mode = GET_MODE (sh_compare_op0);
916 if (mode == VOIDmode)
917 mode = GET_MODE (sh_compare_op1);
920 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
922 /* Force args into regs, since we can't use constants here. */
923 sh_compare_op0 = force_reg (mode, sh_compare_op0);
924 if (sh_compare_op1 != const0_rtx
925 || code == GTU || code == GEU
926 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
927 sh_compare_op1 = force_reg (mode, sh_compare_op1);
929 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
931 from_compare (operands, GT);
932 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
935 insn = gen_rtx_SET (VOIDmode,
936 gen_rtx_REG (SImode, T_REG),
937 gen_rtx (code, SImode, sh_compare_op0,
939 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
941 insn = gen_rtx (PARALLEL, VOIDmode,
943 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
944 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
950 /* Functions to output assembly code. */
952 /* Return a sequence of instructions to perform DI or DF move.
954 Since the SH cannot move a DI or DF in one instruction, we have
955 to take care when we see overlapping source and dest registers. */
958 output_movedouble (insn, operands, mode)
959 rtx insn ATTRIBUTE_UNUSED;
961 enum machine_mode mode;
963 rtx dst = operands[0];
964 rtx src = operands[1];
966 if (GET_CODE (dst) == MEM
967 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
968 return "mov.l %T1,%0\n\tmov.l %1,%0";
970 if (register_operand (dst, mode)
971 && register_operand (src, mode))
973 if (REGNO (src) == MACH_REG)
974 return "sts mach,%S0\n\tsts macl,%R0";
976 /* When mov.d r1,r2 do r2->r3 then r1->r2;
977 when mov.d r1,r0 do r1->r0 then r2->r1. */
979 if (REGNO (src) + 1 == REGNO (dst))
980 return "mov %T1,%T0\n\tmov %1,%0";
982 return "mov %1,%0\n\tmov %T1,%T0";
984 else if (GET_CODE (src) == CONST_INT)
986 if (INTVAL (src) < 0)
987 output_asm_insn ("mov #-1,%S0", operands);
989 output_asm_insn ("mov #0,%S0", operands);
993 else if (GET_CODE (src) == MEM)
996 int dreg = REGNO (dst);
997 rtx inside = XEXP (src, 0);
999 if (GET_CODE (inside) == REG)
1000 ptrreg = REGNO (inside);
1001 else if (GET_CODE (inside) == SUBREG)
1002 ptrreg = subreg_regno (inside);
1003 else if (GET_CODE (inside) == PLUS)
1005 ptrreg = REGNO (XEXP (inside, 0));
1006 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1007 an offsettable address. Unfortunately, offsettable addresses use
1008 QImode to check the offset, and a QImode offsettable address
1009 requires r0 for the other operand, which is not currently
1010 supported, so we can't use the 'o' constraint.
1011 Thus we must check for and handle r0+REG addresses here.
1012 We punt for now, since this is likely very rare. */
1013 if (GET_CODE (XEXP (inside, 1)) == REG)
1016 else if (GET_CODE (inside) == LABEL_REF)
1017 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1018 else if (GET_CODE (inside) == POST_INC)
1019 return "mov.l %1,%0\n\tmov.l %1,%T0";
1023 /* Work out the safe way to copy. Copy into the second half first. */
1025 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1028 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1031 /* Print an instruction which would have gone into a delay slot after
1032 another instruction, but couldn't because the other instruction expanded
1033 into a sequence where putting the slot insn at the end wouldn't work. */
1039 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
1041 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1045 output_far_jump (insn, op)
1049 struct { rtx lab, reg, op; } this;
1050 rtx braf_base_lab = NULL_RTX;
1053 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1056 this.lab = gen_label_rtx ();
1060 && offset - get_attr_length (insn) <= 32766)
1063 jump = "mov.w %O0,%1; braf %1";
1071 jump = "mov.l %O0,%1; braf %1";
1073 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1076 jump = "mov.l %O0,%1; jmp @%1";
1078 /* If we have a scratch register available, use it. */
1079 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1080 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1082 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1083 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1084 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1085 output_asm_insn (jump, &this.lab);
1086 if (dbr_sequence_length ())
1087 print_slot (final_sequence);
1089 output_asm_insn ("nop", 0);
1093 /* Output the delay slot insn first if any. */
1094 if (dbr_sequence_length ())
1095 print_slot (final_sequence);
1097 this.reg = gen_rtx_REG (SImode, 13);
1098 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1099 Fortunately, MACL is fixed and call-clobbered, and we never
1100 need its value across jumps, so save r13 in it instead of in
1103 output_asm_insn ("lds r13, macl", 0);
1105 output_asm_insn ("mov.l r13,@-r15", 0);
1106 output_asm_insn (jump, &this.lab);
1108 output_asm_insn ("sts macl, r13", 0);
1110 output_asm_insn ("mov.l @r15+,r13", 0);
1112 if (far && flag_pic && TARGET_SH2)
1114 braf_base_lab = gen_label_rtx ();
1115 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1116 CODE_LABEL_NUMBER (braf_base_lab));
1119 output_asm_insn (".align 2", 0);
1120 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1122 if (far && flag_pic)
1125 this.lab = braf_base_lab;
1126 output_asm_insn (".long %O2-%O0", &this.lab);
1129 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1133 /* Local label counter, used for constants in the pool and inside
1134 pattern branches. */
1136 static int lf = 100;
1138 /* Output code for ordinary branches. */
1141 output_branch (logic, insn, operands)
1146 switch (get_attr_length (insn))
1149 /* This can happen if filling the delay slot has caused a forward
1150 branch to exceed its range (we could reverse it, but only
1151 when we know we won't overextend other branches; this should
1152 best be handled by relaxation).
1153 It can also happen when other condbranches hoist delay slot insn
1154 from their destination, thus leading to code size increase.
1155 But the branch will still be in the range -4092..+4098 bytes. */
1160 /* The call to print_slot will clobber the operands. */
1161 rtx op0 = operands[0];
1163 /* If the instruction in the delay slot is annulled (true), then
1164 there is no delay slot where we can put it now. The only safe
1165 place for it is after the label. final will do that by default. */
1168 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1170 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1171 ASSEMBLER_DIALECT ? "/" : ".", label);
1172 print_slot (final_sequence);
1175 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1177 output_asm_insn ("bra\t%l0", &op0);
1178 fprintf (asm_out_file, "\tnop\n");
1179 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1183 /* When relaxing, handle this like a short branch. The linker
1184 will fix it up if it still doesn't fit after relaxation. */
1186 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1188 /* These are for SH2e, in which we have to account for the
1189 extra nop because of the hardware bug in annulled branches. */
1196 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1198 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1200 ASSEMBLER_DIALECT ? "/" : ".", label);
1201 fprintf (asm_out_file, "\tnop\n");
1202 output_asm_insn ("bra\t%l0", operands);
1203 fprintf (asm_out_file, "\tnop\n");
1204 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1208 /* When relaxing, fall through. */
1213 sprintf (buffer, "b%s%ss\t%%l0",
1215 ASSEMBLER_DIALECT ? "/" : ".");
1216 output_asm_insn (buffer, &operands[0]);
1221 /* There should be no longer branches now - that would
1222 indicate that something has destroyed the branches set
1223 up in machine_dependent_reorg. */
1229 output_branchy_insn (code, template, insn, operands)
1231 const char *template;
1235 rtx next_insn = NEXT_INSN (insn);
1237 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1239 rtx src = SET_SRC (PATTERN (next_insn));
1240 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1242 /* Following branch not taken */
1243 operands[9] = gen_label_rtx ();
1244 emit_label_after (operands[9], next_insn);
1245 INSN_ADDRESSES_NEW (operands[9],
1246 INSN_ADDRESSES (INSN_UID (next_insn))
1247 + get_attr_length (next_insn));
1252 int offset = (branch_dest (next_insn)
1253 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1254 if (offset >= -252 && offset <= 258)
1256 if (GET_CODE (src) == IF_THEN_ELSE)
1258 src = XEXP (src, 1);
1264 operands[9] = gen_label_rtx ();
1265 emit_label_after (operands[9], insn);
1266 INSN_ADDRESSES_NEW (operands[9],
1267 INSN_ADDRESSES (INSN_UID (insn))
1268 + get_attr_length (insn));
1273 output_ieee_ccmpeq (insn, operands)
1274 rtx insn, *operands;
1276 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1279 /* Output to FILE the start of the assembler file. */
1282 output_file_start (file)
1285 output_file_directive (file, main_input_filename);
1287 /* Switch to the data section so that the coffsem symbol
1288 isn't in the text section. */
1291 if (TARGET_LITTLE_ENDIAN)
1292 fprintf (file, "\t.little\n");
1294 if (TARGET_SHCOMPACT)
1295 fprintf (file, "\t.mode\tSHcompact\n");
1296 else if (TARGET_SHMEDIA)
1297 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1298 TARGET_SHMEDIA64 ? 64 : 32);
1301 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1304 unspec_caller_rtx_p (pat)
1307 switch (GET_CODE (pat))
1310 return unspec_caller_rtx_p (XEXP (pat, 0));
1313 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1315 return unspec_caller_rtx_p (XEXP (pat, 1));
1317 if (XINT (pat, 1) == UNSPEC_CALLER)
1326 /* Indicate that INSN cannot be duplicated. This is true for insn
1327 that generates an unique label. */
1330 sh_cannot_copy_insn_p (insn)
1335 if (!reload_completed || !flag_pic)
1338 if (GET_CODE (insn) != INSN)
1340 if (asm_noperands (insn) >= 0)
1343 pat = PATTERN (insn);
1344 if (GET_CODE (pat) != SET)
1346 pat = SET_SRC (pat);
1348 if (unspec_caller_rtx_p (pat))
1354 /* Actual number of instructions used to make a shift by N. */
1355 static const char ashiftrt_insns[] =
1356 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1358 /* Left shift and logical right shift are the same. */
1359 static const char shift_insns[] =
1360 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1362 /* Individual shift amounts needed to get the above length sequences.
1363 One bit right shifts clobber the T bit, so when possible, put one bit
1364 shifts in the middle of the sequence, so the ends are eligible for
1365 branch delay slots. */
1366 static const short shift_amounts[32][5] = {
1367 {0}, {1}, {2}, {2, 1},
1368 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1369 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1370 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1371 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1372 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1373 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1374 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1376 /* Likewise, but for shift amounts < 16, up to three highmost bits
1377 might be clobbered. This is typically used when combined with some
1378 kind of sign or zero extension. */
1380 static const char ext_shift_insns[] =
1381 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1383 static const short ext_shift_amounts[32][4] = {
1384 {0}, {1}, {2}, {2, 1},
1385 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1386 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1387 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1388 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1389 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1390 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1391 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1393 /* Assuming we have a value that has been sign-extended by at least one bit,
1394 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1395 to shift it by N without data loss, and quicker than by other means? */
1396 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1398 /* This is used in length attributes in sh.md to help compute the length
1399 of arbitrary constant shift instructions. */
1402 shift_insns_rtx (insn)
1405 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1406 int shift_count = INTVAL (XEXP (set_src, 1));
1407 enum rtx_code shift_code = GET_CODE (set_src);
1412 return ashiftrt_insns[shift_count];
1415 return shift_insns[shift_count];
1421 /* Return the cost of a shift. */
1432 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1434 if (GET_MODE (x) == DImode
1435 && GET_CODE (XEXP (x, 1)) == CONST_INT
1436 && INTVAL (XEXP (x, 1)) == 1)
1439 /* Everything else is invalid, because there is no pattern for it. */
1442 /* If shift by a non constant, then this will be expensive. */
1443 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1444 return SH_DYNAMIC_SHIFT_COST;
1446 value = INTVAL (XEXP (x, 1));
1448 /* Otherwise, return the true cost in instructions. */
1449 if (GET_CODE (x) == ASHIFTRT)
1451 int cost = ashiftrt_insns[value];
1452 /* If SH3, then we put the constant in a reg and use shad. */
1453 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1454 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1458 return shift_insns[value];
1461 /* Return the cost of an AND operation. */
1469 /* Anding with a register is a single cycle and instruction. */
1470 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1473 i = INTVAL (XEXP (x, 1));
1477 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1478 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1479 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1485 /* These constants are single cycle extu.[bw] instructions. */
1486 if (i == 0xff || i == 0xffff)
1488 /* Constants that can be used in an and immediate instruction is a single
1489 cycle, but this requires r0, so make it a little more expensive. */
1490 if (CONST_OK_FOR_L (i))
1492 /* Constants that can be loaded with a mov immediate and an and.
1493 This case is probably unnecessary. */
1494 if (CONST_OK_FOR_I (i))
1496 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1497 This case is probably unnecessary. */
1501 /* Return the cost of an addition or a subtraction. */
1507 /* Adding a register is a single cycle insn. */
1508 if (GET_CODE (XEXP (x, 1)) == REG
1509 || GET_CODE (XEXP (x, 1)) == SUBREG)
1512 /* Likewise for small constants. */
1513 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1514 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1518 switch (GET_CODE (XEXP (x, 1)))
1523 return TARGET_SHMEDIA64 ? 5 : 3;
1526 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1528 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1530 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1538 /* Any other constant requires a 2 cycle pc-relative load plus an
1543 /* Return the cost of a multiply. */
1546 rtx x ATTRIBUTE_UNUSED;
1553 /* We have a mul insn, so we can never take more than the mul and the
1554 read of the mac reg, but count more because of the latency and extra
1556 if (TARGET_SMALLCODE)
1561 /* If we're aiming at small code, then just count the number of
1562 insns in a multiply call sequence. */
1563 if (TARGET_SMALLCODE)
1566 /* Otherwise count all the insns in the routine we'd be calling too. */
1570 /* Compute a (partial) cost for rtx X. Return true if the complete
1571 cost has been computed, and false if subexpressions should be
1572 scanned. In either case, *TOTAL contains the cost result. */
1575 sh_rtx_costs (x, code, outer_code, total)
1577 int code, outer_code, *total;
1584 if (INTVAL (x) == 0)
1586 else if (outer_code == AND && and_operand ((x), DImode))
1588 else if ((outer_code == IOR || outer_code == XOR
1589 || outer_code == PLUS)
1590 && CONST_OK_FOR_P (INTVAL (x)))
1592 else if (CONST_OK_FOR_J (INTVAL (x)))
1593 *total = COSTS_N_INSNS (outer_code != SET);
1594 else if (CONST_OK_FOR_J (INTVAL (x) >> 16))
1595 *total = COSTS_N_INSNS (2);
1596 else if (CONST_OK_FOR_J ((INTVAL (x) >> 16) >> 16))
1597 *total = COSTS_N_INSNS (3);
1599 *total = COSTS_N_INSNS (4);
1602 if (CONST_OK_FOR_I (INTVAL (x)))
1604 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1605 && CONST_OK_FOR_L (INTVAL (x)))
1614 if (TARGET_SHMEDIA64)
1615 *total = COSTS_N_INSNS (4);
1616 else if (TARGET_SHMEDIA32)
1617 *total = COSTS_N_INSNS (2);
1624 *total = COSTS_N_INSNS (4);
1630 *total = COSTS_N_INSNS (addsubcosts (x));
1634 *total = COSTS_N_INSNS (andcosts (x));
1638 *total = COSTS_N_INSNS (multcosts (x));
1644 *total = COSTS_N_INSNS (shiftcosts (x));
1651 *total = COSTS_N_INSNS (20);
1664 /* Compute the cost of an address. For the SH, all valid addresses are
1665 the same cost. Use a slightly higher cost for reg + reg addressing,
1666 since it increases pressure on r0. */
1672 return (GET_CODE (X) == PLUS
1673 && ! CONSTANT_P (XEXP (X, 1))
1674 && ! TARGET_SHMEDIA ? 1 : 0);
1677 /* Code to expand a shift. */
1680 gen_ashift (type, n, reg)
1685 /* Negative values here come from the shift_amounts array. */
1698 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1702 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1704 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1707 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1712 /* Same for HImode */
1715 gen_ashift_hi (type, n, reg)
1720 /* Negative values here come from the shift_amounts array. */
1734 /* We don't have HImode right shift operations because using the
1735 ordinary 32 bit shift instructions for that doesn't generate proper
1736 zero/sign extension.
1737 gen_ashift_hi is only called in contexts where we know that the
1738 sign extension works out correctly. */
1741 if (GET_CODE (reg) == SUBREG)
1743 offset = SUBREG_BYTE (reg);
1744 reg = SUBREG_REG (reg);
1746 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1750 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1755 /* Output RTL to split a constant shift into its component SH constant
1756 shift instructions. */
1759 gen_shifty_op (code, operands)
1763 int value = INTVAL (operands[2]);
1766 /* Truncate the shift count in case it is out of bounds. */
1767 value = value & 0x1f;
1771 if (code == LSHIFTRT)
1773 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1774 emit_insn (gen_movt (operands[0]));
1777 else if (code == ASHIFT)
1779 /* There is a two instruction sequence for 31 bit left shifts,
1780 but it requires r0. */
1781 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1783 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1784 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1789 else if (value == 0)
1791 /* This can happen when not optimizing. We must output something here
1792 to prevent the compiler from aborting in final.c after the try_split
1794 emit_insn (gen_nop ());
1798 max = shift_insns[value];
1799 for (i = 0; i < max; i++)
1800 gen_ashift (code, shift_amounts[value][i], operands[0]);
1803 /* Same as above, but optimized for values where the topmost bits don't
1807 gen_shifty_hi_op (code, operands)
1811 int value = INTVAL (operands[2]);
1813 void (*gen_fun) PARAMS ((int, int, rtx));
1815 /* This operation is used by and_shl for SImode values with a few
1816 high bits known to be cleared. */
1820 emit_insn (gen_nop ());
1824 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1827 max = ext_shift_insns[value];
1828 for (i = 0; i < max; i++)
1829 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1832 /* When shifting right, emit the shifts in reverse order, so that
1833 solitary negative values come first. */
1834 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1835 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1838 /* Output RTL for an arithmetic right shift. */
1840 /* ??? Rewrite to use super-optimizer sequences. */
1843 expand_ashiftrt (operands)
1854 if (GET_CODE (operands[2]) != CONST_INT)
1856 rtx count = copy_to_mode_reg (SImode, operands[2]);
1857 emit_insn (gen_negsi2 (count, count));
1858 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1861 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1862 > 1 + SH_DYNAMIC_SHIFT_COST)
1865 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1866 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1870 if (GET_CODE (operands[2]) != CONST_INT)
1873 value = INTVAL (operands[2]) & 31;
1877 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1880 else if (value >= 16 && value <= 19)
1882 wrk = gen_reg_rtx (SImode);
1883 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1886 gen_ashift (ASHIFTRT, 1, wrk);
1887 emit_move_insn (operands[0], wrk);
1890 /* Expand a short sequence inline, longer call a magic routine. */
1891 else if (value <= 5)
1893 wrk = gen_reg_rtx (SImode);
1894 emit_move_insn (wrk, operands[1]);
1896 gen_ashift (ASHIFTRT, 1, wrk);
1897 emit_move_insn (operands[0], wrk);
1901 wrk = gen_reg_rtx (Pmode);
1903 /* Load the value into an arg reg and call a helper. */
1904 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1905 sprintf (func, "__ashiftrt_r4_%d", value);
1906 func_name = get_identifier (func);
1907 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1908 emit_move_insn (wrk, sym);
1909 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1910 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1915 sh_dynamicalize_shift_p (count)
1918 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1921 /* Try to find a good way to implement the combiner pattern
1922 [(set (match_operand:SI 0 "register_operand" "r")
1923 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1924 (match_operand:SI 2 "const_int_operand" "n"))
1925 (match_operand:SI 3 "const_int_operand" "n"))) .
1926 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1927 return 0 for simple right / left or left/right shift combination.
1928 return 1 for a combination of shifts with zero_extend.
1929 return 2 for a combination of shifts with an AND that needs r0.
1930 return 3 for a combination of shifts with an AND that needs an extra
1931 scratch register, when the three highmost bits of the AND mask are clear.
1932 return 4 for a combination of shifts with an AND that needs an extra
1933 scratch register, when any of the three highmost bits of the AND mask
1935 If ATTRP is set, store an initial right shift width in ATTRP[0],
1936 and the instruction length in ATTRP[1] . These values are not valid
1938 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1939 shift_amounts for the last shift value that is to be used before the
1942 shl_and_kind (left_rtx, mask_rtx, attrp)
1943 rtx left_rtx, mask_rtx;
1946 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1947 int left = INTVAL (left_rtx), right;
1949 int cost, best_cost = 10000;
1950 int best_right = 0, best_len = 0;
1954 if (left < 0 || left > 31)
1956 if (GET_CODE (mask_rtx) == CONST_INT)
1957 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1959 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1960 /* Can this be expressed as a right shift / left shift pair ? */
1961 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1962 right = exact_log2 (lsb);
1963 mask2 = ~(mask + lsb - 1);
1964 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1965 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1967 best_cost = shift_insns[right] + shift_insns[right + left];
1968 /* mask has no trailing zeroes <==> ! right */
1969 else if (! right && mask2 == ~(lsb2 - 1))
1971 int late_right = exact_log2 (lsb2);
1972 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1974 /* Try to use zero extend */
1975 if (mask2 == ~(lsb2 - 1))
1979 for (width = 8; width <= 16; width += 8)
1981 /* Can we zero-extend right away? */
1982 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1985 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1986 if (cost < best_cost)
1997 /* ??? Could try to put zero extend into initial right shift,
1998 or even shift a bit left before the right shift. */
1999 /* Determine value of first part of left shift, to get to the
2000 zero extend cut-off point. */
2001 first = width - exact_log2 (lsb2) + right;
2002 if (first >= 0 && right + left - first >= 0)
2004 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2005 + ext_shift_insns[right + left - first];
2006 if (cost < best_cost)
2018 /* Try to use r0 AND pattern */
2019 for (i = 0; i <= 2; i++)
2023 if (! CONST_OK_FOR_L (mask >> i))
2025 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2026 if (cost < best_cost)
2031 best_len = cost - 1;
2034 /* Try to use a scratch register to hold the AND operand. */
2035 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2036 for (i = 0; i <= 2; i++)
2040 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
2041 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2042 if (cost < best_cost)
2047 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
2053 attrp[0] = best_right;
2054 attrp[1] = best_len;
2059 /* This is used in length attributes of the unnamed instructions
2060 corresponding to shl_and_kind return values of 1 and 2. */
2062 shl_and_length (insn)
2065 rtx set_src, left_rtx, mask_rtx;
2068 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2069 left_rtx = XEXP (XEXP (set_src, 0), 1);
2070 mask_rtx = XEXP (set_src, 1);
2071 shl_and_kind (left_rtx, mask_rtx, attributes);
2072 return attributes[1];
2075 /* This is used in length attribute of the and_shl_scratch instruction. */
2078 shl_and_scr_length (insn)
2081 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2082 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2083 rtx op = XEXP (set_src, 0);
2084 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2085 op = XEXP (XEXP (op, 0), 0);
2086 return len + shift_insns[INTVAL (XEXP (op, 1))];
2089 /* Generating rtl? */
2090 extern int rtx_equal_function_value_matters;
2092 /* Generate rtl for instructions for which shl_and_kind advised a particular
2093 method of generating them, i.e. returned zero. */
2096 gen_shl_and (dest, left_rtx, mask_rtx, source)
2097 rtx dest, left_rtx, mask_rtx, source;
2100 unsigned HOST_WIDE_INT mask;
2101 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2102 int right, total_shift;
2103 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
2105 right = attributes[0];
2106 total_shift = INTVAL (left_rtx) + right;
2107 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2114 int first = attributes[2];
2119 emit_insn ((mask << right) <= 0xff
2120 ? gen_zero_extendqisi2(dest,
2121 gen_lowpart (QImode, source))
2122 : gen_zero_extendhisi2(dest,
2123 gen_lowpart (HImode, source)));
2127 emit_insn (gen_movsi (dest, source));
2131 operands[2] = GEN_INT (right);
2132 gen_shifty_hi_op (LSHIFTRT, operands);
2136 operands[2] = GEN_INT (first);
2137 gen_shifty_hi_op (ASHIFT, operands);
2138 total_shift -= first;
2142 emit_insn (mask <= 0xff
2143 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2144 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2145 if (total_shift > 0)
2147 operands[2] = GEN_INT (total_shift);
2148 gen_shifty_hi_op (ASHIFT, operands);
2153 shift_gen_fun = gen_shifty_op;
2155 /* If the topmost bit that matters is set, set the topmost bits
2156 that don't matter. This way, we might be able to get a shorter
2158 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2159 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2161 /* Don't expand fine-grained when combining, because that will
2162 make the pattern fail. */
2163 if (rtx_equal_function_value_matters
2164 || reload_in_progress || reload_completed)
2168 /* Cases 3 and 4 should be handled by this split
2169 only while combining */
2174 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2177 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2182 operands[2] = GEN_INT (total_shift);
2183 shift_gen_fun (ASHIFT, operands);
2190 if (kind != 4 && total_shift < 16)
2192 neg = -ext_shift_amounts[total_shift][1];
2194 neg -= ext_shift_amounts[total_shift][2];
2198 emit_insn (gen_and_shl_scratch (dest, source,
2201 GEN_INT (total_shift + neg),
2203 emit_insn (gen_movsi (dest, dest));
2210 /* Try to find a good way to implement the combiner pattern
2211 [(set (match_operand:SI 0 "register_operand" "=r")
2212 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2213 (match_operand:SI 2 "const_int_operand" "n")
2214 (match_operand:SI 3 "const_int_operand" "n")
2216 (clobber (reg:SI T_REG))]
2217 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2218 return 0 for simple left / right shift combination.
2219 return 1 for left shift / 8 bit sign extend / left shift.
2220 return 2 for left shift / 16 bit sign extend / left shift.
2221 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2222 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2223 return 5 for left shift / 16 bit sign extend / right shift
2224 return 6 for < 8 bit sign extend / left shift.
2225 return 7 for < 8 bit sign extend / left shift / single right shift.
2226 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2229 shl_sext_kind (left_rtx, size_rtx, costp)
2230 rtx left_rtx, size_rtx;
2233 int left, size, insize, ext;
2234 int cost = 0, best_cost;
2237 left = INTVAL (left_rtx);
2238 size = INTVAL (size_rtx);
2239 insize = size - left;
2242 /* Default to left / right shift. */
2244 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2247 /* 16 bit shift / sign extend / 16 bit shift */
2248 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2249 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2250 below, by alternative 3 or something even better. */
2251 if (cost < best_cost)
2257 /* Try a plain sign extend between two shifts. */
2258 for (ext = 16; ext >= insize; ext -= 8)
2262 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2263 if (cost < best_cost)
2265 kind = ext / (unsigned) 8;
2269 /* Check if we can do a sloppy shift with a final signed shift
2270 restoring the sign. */
2271 if (EXT_SHIFT_SIGNED (size - ext))
2272 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2273 /* If not, maybe it's still cheaper to do the second shift sloppy,
2274 and do a final sign extend? */
2275 else if (size <= 16)
2276 cost = ext_shift_insns[ext - insize] + 1
2277 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2280 if (cost < best_cost)
2282 kind = ext / (unsigned) 8 + 2;
2286 /* Check if we can sign extend in r0 */
2289 cost = 3 + shift_insns[left];
2290 if (cost < best_cost)
2295 /* Try the same with a final signed shift. */
2298 cost = 3 + ext_shift_insns[left + 1] + 1;
2299 if (cost < best_cost)
2308 /* Try to use a dynamic shift. */
2309 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2310 if (cost < best_cost)
2321 /* Function to be used in the length attribute of the instructions
2322 implementing this pattern. */
2325 shl_sext_length (insn)
2328 rtx set_src, left_rtx, size_rtx;
2331 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2332 left_rtx = XEXP (XEXP (set_src, 0), 1);
2333 size_rtx = XEXP (set_src, 1);
2334 shl_sext_kind (left_rtx, size_rtx, &cost);
2338 /* Generate rtl for this pattern */
2341 gen_shl_sext (dest, left_rtx, size_rtx, source)
2342 rtx dest, left_rtx, size_rtx, source;
2345 int left, size, insize, cost;
2348 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2349 left = INTVAL (left_rtx);
2350 size = INTVAL (size_rtx);
2351 insize = size - left;
2359 int ext = kind & 1 ? 8 : 16;
2360 int shift2 = size - ext;
2362 /* Don't expand fine-grained when combining, because that will
2363 make the pattern fail. */
2364 if (! rtx_equal_function_value_matters
2365 && ! reload_in_progress && ! reload_completed)
2367 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2368 emit_insn (gen_movsi (dest, source));
2372 emit_insn (gen_movsi (dest, source));
2376 operands[2] = GEN_INT (ext - insize);
2377 gen_shifty_hi_op (ASHIFT, operands);
2380 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2381 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2386 operands[2] = GEN_INT (shift2);
2387 gen_shifty_op (ASHIFT, operands);
2394 if (EXT_SHIFT_SIGNED (shift2))
2396 operands[2] = GEN_INT (shift2 + 1);
2397 gen_shifty_op (ASHIFT, operands);
2398 operands[2] = GEN_INT (1);
2399 gen_shifty_op (ASHIFTRT, operands);
2402 operands[2] = GEN_INT (shift2);
2403 gen_shifty_hi_op (ASHIFT, operands);
2407 operands[2] = GEN_INT (-shift2);
2408 gen_shifty_hi_op (LSHIFTRT, operands);
2410 emit_insn (size <= 8
2411 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2412 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2419 if (! rtx_equal_function_value_matters
2420 && ! reload_in_progress && ! reload_completed)
2421 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2425 operands[2] = GEN_INT (16 - insize);
2426 gen_shifty_hi_op (ASHIFT, operands);
2427 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2429 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2431 gen_ashift (ASHIFTRT, 1, dest);
2436 /* Don't expand fine-grained when combining, because that will
2437 make the pattern fail. */
2438 if (! rtx_equal_function_value_matters
2439 && ! reload_in_progress && ! reload_completed)
2441 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2442 emit_insn (gen_movsi (dest, source));
2445 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2446 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2447 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2449 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2450 gen_shifty_op (ASHIFT, operands);
2452 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2460 /* Prefix a symbol_ref name with "datalabel". */
2463 gen_datalabel_ref (sym)
2466 if (GET_CODE (sym) == LABEL_REF)
2467 return gen_rtx_CONST (GET_MODE (sym),
2468 gen_rtx_UNSPEC (GET_MODE (sym),
2472 if (GET_CODE (sym) != SYMBOL_REF)
2479 /* The SH cannot load a large constant into a register, constants have to
2480 come from a pc relative load. The reference of a pc relative load
2481 instruction must be less than 1k infront of the instruction. This
2482 means that we often have to dump a constant inside a function, and
2483 generate code to branch around it.
2485 It is important to minimize this, since the branches will slow things
2486 down and make things bigger.
2488 Worst case code looks like:
2506 We fix this by performing a scan before scheduling, which notices which
2507 instructions need to have their operands fetched from the constant table
2508 and builds the table.
2512 scan, find an instruction which needs a pcrel move. Look forward, find the
2513 last barrier which is within MAX_COUNT bytes of the requirement.
2514 If there isn't one, make one. Process all the instructions between
2515 the find and the barrier.
2517 In the above example, we can tell that L3 is within 1k of L1, so
2518 the first move can be shrunk from the 3 insn+constant sequence into
2519 just 1 insn, and the constant moved to L3 to make:
2530 Then the second move becomes the target for the shortening process. */
2534 rtx value; /* Value in table. */
2535 rtx label; /* Label of value. */
2536 rtx wend; /* End of window. */
2537 enum machine_mode mode; /* Mode of value. */
2539 /* True if this constant is accessed as part of a post-increment
2540 sequence. Note that HImode constants are never accessed in this way. */
2541 bool part_of_sequence_p;
2544 /* The maximum number of constants that can fit into one pool, since
2545 the pc relative range is 0...1020 bytes and constants are at least 4
2548 #define MAX_POOL_SIZE (1020/4)
2549 static pool_node pool_vector[MAX_POOL_SIZE];
2550 static int pool_size;
2551 static rtx pool_window_label;
2552 static int pool_window_last;
2554 /* ??? If we need a constant in HImode which is the truncated value of a
2555 constant we need in SImode, we could combine the two entries thus saving
2556 two bytes. Is this common enough to be worth the effort of implementing
2559 /* ??? This stuff should be done at the same time that we shorten branches.
2560 As it is now, we must assume that all branches are the maximum size, and
2561 this causes us to almost always output constant pools sooner than
2564 /* Add a constant to the pool and return its label. */
2567 add_constant (x, mode, last_value)
2569 enum machine_mode mode;
2573 rtx lab, new, ref, newref;
2575 /* First see if we've already got it. */
2576 for (i = 0; i < pool_size; i++)
2578 if (x->code == pool_vector[i].value->code
2579 && mode == pool_vector[i].mode)
2581 if (x->code == CODE_LABEL)
2583 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2586 if (rtx_equal_p (x, pool_vector[i].value))
2591 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2593 new = gen_label_rtx ();
2594 LABEL_REFS (new) = pool_vector[i].label;
2595 pool_vector[i].label = lab = new;
2597 if (lab && pool_window_label)
2599 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2600 ref = pool_vector[pool_window_last].wend;
2601 LABEL_NEXTREF (newref) = ref;
2602 pool_vector[pool_window_last].wend = newref;
2605 pool_window_label = new;
2606 pool_window_last = i;
2612 /* Need a new one. */
2613 pool_vector[pool_size].value = x;
2614 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2617 pool_vector[pool_size - 1].part_of_sequence_p = true;
2620 lab = gen_label_rtx ();
2621 pool_vector[pool_size].mode = mode;
2622 pool_vector[pool_size].label = lab;
2623 pool_vector[pool_size].wend = NULL_RTX;
2624 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2625 if (lab && pool_window_label)
2627 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2628 ref = pool_vector[pool_window_last].wend;
2629 LABEL_NEXTREF (newref) = ref;
2630 pool_vector[pool_window_last].wend = newref;
2633 pool_window_label = lab;
2634 pool_window_last = pool_size;
2639 /* Output the literal table. */
2650 /* Do two passes, first time dump out the HI sized constants. */
2652 for (i = 0; i < pool_size; i++)
2654 pool_node *p = &pool_vector[i];
2656 if (p->mode == HImode)
2660 scan = emit_insn_after (gen_align_2 (), scan);
2663 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2664 scan = emit_label_after (lab, scan);
2665 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2667 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2669 lab = XEXP (ref, 0);
2670 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2673 else if (p->mode == DFmode)
2679 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2681 rtx align_insn = NULL_RTX;
2683 scan = emit_label_after (gen_label_rtx (), scan);
2684 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2687 for (i = 0; i < pool_size; i++)
2689 pool_node *p = &pool_vector[i];
2697 if (align_insn && !p->part_of_sequence_p)
2699 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2700 emit_label_before (lab, align_insn);
2701 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2703 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2705 lab = XEXP (ref, 0);
2706 emit_insn_before (gen_consttable_window_end (lab),
2709 delete_insn (align_insn);
2710 align_insn = NULL_RTX;
2715 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2716 scan = emit_label_after (lab, scan);
2717 scan = emit_insn_after (gen_consttable_4 (p->value,
2719 need_align = ! need_align;
2725 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2730 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2731 scan = emit_label_after (lab, scan);
2732 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2740 if (p->mode != HImode)
2742 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2744 lab = XEXP (ref, 0);
2745 scan = emit_insn_after (gen_consttable_window_end (lab),
2754 for (i = 0; i < pool_size; i++)
2756 pool_node *p = &pool_vector[i];
2767 scan = emit_label_after (gen_label_rtx (), scan);
2768 scan = emit_insn_after (gen_align_4 (), scan);
2770 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2771 scan = emit_label_after (lab, scan);
2772 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2780 scan = emit_label_after (gen_label_rtx (), scan);
2781 scan = emit_insn_after (gen_align_4 (), scan);
2783 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2784 scan = emit_label_after (lab, scan);
2785 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2793 if (p->mode != HImode)
2795 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2797 lab = XEXP (ref, 0);
2798 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2803 scan = emit_insn_after (gen_consttable_end (), scan);
2804 scan = emit_barrier_after (scan);
2806 pool_window_label = NULL_RTX;
2807 pool_window_last = 0;
2810 /* Return nonzero if constant would be an ok source for a
2811 mov.w instead of a mov.l. */
2817 return (GET_CODE (src) == CONST_INT
2818 && INTVAL (src) >= -32768
2819 && INTVAL (src) <= 32767);
2822 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2824 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2825 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2826 need to fix it if the input value is CONST_OK_FOR_I. */
2832 if (GET_CODE (insn) == INSN)
2834 rtx pat = PATTERN (insn);
2835 if (GET_CODE (pat) == PARALLEL)
2836 pat = XVECEXP (pat, 0, 0);
2837 if (GET_CODE (pat) == SET
2838 /* We can load any 8 bit value if we don't care what the high
2839 order bits end up as. */
2840 && GET_MODE (SET_DEST (pat)) != QImode
2841 && (CONSTANT_P (SET_SRC (pat))
2842 /* Match mova_const. */
2843 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2844 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2845 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2847 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2848 && (fp_zero_operand (SET_SRC (pat))
2849 || fp_one_operand (SET_SRC (pat)))
2850 /* ??? If this is a -m4 or -m4-single compilation, in general
2851 we don't know the current setting of fpscr, so disable fldi.
2852 There is an exception if this was a register-register move
2853 before reload - and hence it was ascertained that we have
2854 single precision setting - and in a post-reload optimization
2855 we changed this to do a constant load. In that case
2856 we don't have an r0 clobber, hence we must use fldi. */
2857 && (! TARGET_SH4 || TARGET_FMOVD
2858 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2860 && GET_CODE (SET_DEST (pat)) == REG
2861 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2862 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2863 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2874 return (GET_CODE (insn) == INSN
2875 && GET_CODE (PATTERN (insn)) == SET
2876 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2877 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2878 /* Don't match mova_const. */
2879 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2882 /* Find the last barrier from insn FROM which is close enough to hold the
2883 constant pool. If we can't find one, then create one near the end of
2887 find_barrier (num_mova, mova, from)
2898 int leading_mova = num_mova;
2899 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2903 /* For HImode: range is 510, add 4 because pc counts from address of
2904 second instruction after this one, subtract 2 for the jump instruction
2905 that we may need to emit before the table, subtract 2 for the instruction
2906 that fills the jump delay slot (in very rare cases, reorg will take an
2907 instruction from after the constant pool or will leave the delay slot
2908 empty). This gives 510.
2909 For SImode: range is 1020, add 4 because pc counts from address of
2910 second instruction after this one, subtract 2 in case pc is 2 byte
2911 aligned, subtract 2 for the jump instruction that we may need to emit
2912 before the table, subtract 2 for the instruction that fills the jump
2913 delay slot. This gives 1018. */
2915 /* The branch will always be shortened now that the reference address for
2916 forward branches is the successor address, thus we need no longer make
2917 adjustments to the [sh]i_limit for -O0. */
2922 while (from && count_si < si_limit && count_hi < hi_limit)
2924 int inc = get_attr_length (from);
2927 if (GET_CODE (from) == CODE_LABEL)
2930 new_align = 1 << label_to_alignment (from);
2931 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2932 new_align = 1 << barrier_align (from);
2938 if (GET_CODE (from) == BARRIER)
2941 found_barrier = from;
2943 /* If we are at the end of the function, or in front of an alignment
2944 instruction, we need not insert an extra alignment. We prefer
2945 this kind of barrier. */
2946 if (barrier_align (from) > 2)
2947 good_barrier = from;
2950 if (broken_move (from))
2953 enum machine_mode mode;
2955 pat = PATTERN (from);
2956 if (GET_CODE (pat) == PARALLEL)
2957 pat = XVECEXP (pat, 0, 0);
2958 src = SET_SRC (pat);
2959 dst = SET_DEST (pat);
2960 mode = GET_MODE (dst);
2962 /* We must explicitly check the mode, because sometimes the
2963 front end will generate code to load unsigned constants into
2964 HImode targets without properly sign extending them. */
2966 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2969 /* We put the short constants before the long constants, so
2970 we must count the length of short constants in the range
2971 for the long constants. */
2972 /* ??? This isn't optimal, but is easy to do. */
2977 /* We dump DF/DI constants before SF/SI ones, because
2978 the limit is the same, but the alignment requirements
2979 are higher. We may waste up to 4 additional bytes
2980 for alignment, and the DF/DI constant may have
2981 another SF/SI constant placed before it. */
2982 if (TARGET_SHCOMPACT
2984 && (mode == DFmode || mode == DImode))
2989 while (si_align > 2 && found_si + si_align - 2 > count_si)
2991 if (found_si > count_si)
2992 count_si = found_si;
2993 found_si += GET_MODE_SIZE (mode);
2995 si_limit -= GET_MODE_SIZE (mode);
2998 /* See the code in machine_dependent_reorg, which has a similar if
2999 statement that generates a new mova insn in many cases. */
3000 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3010 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3012 if (found_si > count_si)
3013 count_si = found_si;
3015 else if (GET_CODE (from) == JUMP_INSN
3016 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3017 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3021 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3023 /* We have just passed the barrier in front of the
3024 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3025 the ADDR_DIFF_VEC is accessed as data, just like our pool
3026 constants, this is a good opportunity to accommodate what
3027 we have gathered so far.
3028 If we waited any longer, we could end up at a barrier in
3029 front of code, which gives worse cache usage for separated
3030 instruction / data caches. */
3031 good_barrier = found_barrier;
3036 rtx body = PATTERN (from);
3037 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3040 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3041 else if (GET_CODE (from) == JUMP_INSN
3043 && ! TARGET_SMALLCODE)
3049 if (new_align > si_align)
3051 si_limit -= (count_si - 1) & (new_align - si_align);
3052 si_align = new_align;
3054 count_si = (count_si + new_align - 1) & -new_align;
3059 if (new_align > hi_align)
3061 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3062 hi_align = new_align;
3064 count_hi = (count_hi + new_align - 1) & -new_align;
3066 from = NEXT_INSN (from);
3073 /* Try as we might, the leading mova is out of range. Change
3074 it into a load (which will become a pcload) and retry. */
3075 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3076 INSN_CODE (mova) = -1;
3077 return find_barrier (0, 0, mova);
3081 /* Insert the constant pool table before the mova instruction,
3082 to prevent the mova label reference from going out of range. */
3084 good_barrier = found_barrier = barrier_before_mova;
3090 if (good_barrier && next_real_insn (found_barrier))
3091 found_barrier = good_barrier;
3095 /* We didn't find a barrier in time to dump our stuff,
3096 so we'll make one. */
3097 rtx label = gen_label_rtx ();
3099 /* If we exceeded the range, then we must back up over the last
3100 instruction we looked at. Otherwise, we just need to undo the
3101 NEXT_INSN at the end of the loop. */
3102 if (count_hi > hi_limit || count_si > si_limit)
3103 from = PREV_INSN (PREV_INSN (from));
3105 from = PREV_INSN (from);
3107 /* Walk back to be just before any jump or label.
3108 Putting it before a label reduces the number of times the branch
3109 around the constant pool table will be hit. Putting it before
3110 a jump makes it more likely that the bra delay slot will be
3112 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3113 || GET_CODE (from) == CODE_LABEL)
3114 from = PREV_INSN (from);
3116 from = emit_jump_insn_after (gen_jump (label), from);
3117 JUMP_LABEL (from) = label;
3118 LABEL_NUSES (label) = 1;
3119 found_barrier = emit_barrier_after (from);
3120 emit_label_after (label, found_barrier);
3123 return found_barrier;
3126 /* If the instruction INSN is implemented by a special function, and we can
3127 positively find the register that is used to call the sfunc, and this
3128 register is not used anywhere else in this instruction - except as the
3129 destination of a set, return this register; else, return 0. */
3131 sfunc_uses_reg (insn)
3135 rtx pattern, part, reg_part, reg;
3137 if (GET_CODE (insn) != INSN)
3139 pattern = PATTERN (insn);
3140 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3143 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3145 part = XVECEXP (pattern, 0, i);
3146 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3151 reg = XEXP (reg_part, 0);
3152 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3154 part = XVECEXP (pattern, 0, i);
3155 if (part == reg_part || GET_CODE (part) == CLOBBER)
3157 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3158 && GET_CODE (SET_DEST (part)) == REG)
3159 ? SET_SRC (part) : part)))
3165 /* See if the only way in which INSN uses REG is by calling it, or by
3166 setting it while calling it. Set *SET to a SET rtx if the register
3170 noncall_uses_reg (reg, insn, set)
3179 reg2 = sfunc_uses_reg (insn);
3180 if (reg2 && REGNO (reg2) == REGNO (reg))
3182 pattern = single_set (insn);
3184 && GET_CODE (SET_DEST (pattern)) == REG
3185 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3189 if (GET_CODE (insn) != CALL_INSN)
3191 /* We don't use rtx_equal_p because we don't care if the mode is
3193 pattern = single_set (insn);
3195 && GET_CODE (SET_DEST (pattern)) == REG
3196 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3202 par = PATTERN (insn);
3203 if (GET_CODE (par) == PARALLEL)
3204 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3206 part = XVECEXP (par, 0, i);
3207 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3210 return reg_mentioned_p (reg, SET_SRC (pattern));
3216 pattern = PATTERN (insn);
3218 if (GET_CODE (pattern) == PARALLEL)
3222 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3223 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3225 pattern = XVECEXP (pattern, 0, 0);
3228 if (GET_CODE (pattern) == SET)
3230 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3232 /* We don't use rtx_equal_p, because we don't care if the
3233 mode is different. */
3234 if (GET_CODE (SET_DEST (pattern)) != REG
3235 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3241 pattern = SET_SRC (pattern);
3244 if (GET_CODE (pattern) != CALL
3245 || GET_CODE (XEXP (pattern, 0)) != MEM
3246 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3252 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3253 general registers. Bits 0..15 mean that the respective registers
3254 are used as inputs in the instruction. Bits 16..31 mean that the
3255 registers 0..15, respectively, are used as outputs, or are clobbered.
3256 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3258 regs_used (x, is_dest)
3267 code = GET_CODE (x);
3272 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3273 << (REGNO (x) + is_dest));
3277 rtx y = SUBREG_REG (x);
3279 if (GET_CODE (y) != REG)
3282 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3284 subreg_regno_offset (REGNO (y),
3287 GET_MODE (x)) + is_dest));
3291 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3293 /* If there was a return value, it must have been indicated with USE. */
3308 fmt = GET_RTX_FORMAT (code);
3310 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3315 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3316 used |= regs_used (XVECEXP (x, i, j), is_dest);
3318 else if (fmt[i] == 'e')
3319 used |= regs_used (XEXP (x, i), is_dest);
3324 /* Create an instruction that prevents redirection of a conditional branch
3325 to the destination of the JUMP with address ADDR.
3326 If the branch needs to be implemented as an indirect jump, try to find
3327 a scratch register for it.
3328 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3329 If any preceding insn that doesn't fit into a delay slot is good enough,
3330 pass 1. Pass 2 if a definite blocking insn is needed.
3331 -1 is used internally to avoid deep recursion.
3332 If a blocking instruction is made or recognized, return it. */
3335 gen_block_redirect (jump, addr, need_block)
3337 int addr, need_block;
3340 rtx prev = prev_nonnote_insn (jump);
3343 /* First, check if we already have an instruction that satisfies our need. */
3344 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3346 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3348 if (GET_CODE (PATTERN (prev)) == USE
3349 || GET_CODE (PATTERN (prev)) == CLOBBER
3350 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3352 else if ((need_block &= ~1) < 0)
3354 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3357 /* We can't use JUMP_LABEL here because it might be undefined
3358 when not optimizing. */
3359 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3360 /* If the branch is out of range, try to find a scratch register for it. */
3362 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3366 /* Don't look for the stack pointer as a scratch register,
3367 it would cause trouble if an interrupt occurred. */
3368 unsigned try = 0x7fff, used;
3369 int jump_left = flag_expensive_optimizations + 1;
3371 /* It is likely that the most recent eligible instruction is wanted for
3372 the delay slot. Therefore, find out which registers it uses, and
3373 try to avoid using them. */
3375 for (scan = jump; (scan = PREV_INSN (scan)); )
3379 if (INSN_DELETED_P (scan))
3381 code = GET_CODE (scan);
3382 if (code == CODE_LABEL || code == JUMP_INSN)
3385 && GET_CODE (PATTERN (scan)) != USE
3386 && GET_CODE (PATTERN (scan)) != CLOBBER
3387 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3389 try &= ~regs_used (PATTERN (scan), 0);
3393 for (used = dead = 0, scan = JUMP_LABEL (jump);
3394 (scan = NEXT_INSN (scan)); )
3398 if (INSN_DELETED_P (scan))
3400 code = GET_CODE (scan);
3401 if (GET_RTX_CLASS (code) == 'i')
3403 used |= regs_used (PATTERN (scan), 0);
3404 if (code == CALL_INSN)
3405 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3406 dead |= (used >> 16) & ~used;
3412 if (code == JUMP_INSN)
3414 if (jump_left-- && simplejump_p (scan))
3415 scan = JUMP_LABEL (scan);
3421 /* Mask out the stack pointer again, in case it was
3422 the only 'free' register we have found. */
3425 /* If the immediate destination is still in range, check for possible
3426 threading with a jump beyond the delay slot insn.
3427 Don't check if we are called recursively; the jump has been or will be
3428 checked in a different invocation then. */
3430 else if (optimize && need_block >= 0)
3432 rtx next = next_active_insn (next_active_insn (dest));
3433 if (next && GET_CODE (next) == JUMP_INSN
3434 && GET_CODE (PATTERN (next)) == SET
3435 && recog_memoized (next) == CODE_FOR_jump_compact)
3437 dest = JUMP_LABEL (next);
3439 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3441 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3447 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3449 /* It would be nice if we could convert the jump into an indirect
3450 jump / far branch right now, and thus exposing all constituent
3451 instructions to further optimization. However, reorg uses
3452 simplejump_p to determine if there is an unconditional jump where
3453 it should try to schedule instructions from the target of the
3454 branch; simplejump_p fails for indirect jumps even if they have
3456 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3457 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3459 /* ??? We would like this to have the scope of the jump, but that
3460 scope will change when a delay slot insn of an inner scope is added.
3461 Hence, after delay slot scheduling, we'll have to expect
3462 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3465 INSN_SCOPE (insn) = INSN_SCOPE (jump);
3466 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3469 else if (need_block)
3470 /* We can't use JUMP_LABEL here because it might be undefined
3471 when not optimizing. */
3472 return emit_insn_before (gen_block_branch_redirect
3473 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3478 #define CONDJUMP_MIN -252
3479 #define CONDJUMP_MAX 262
3482 /* A label (to be placed) in front of the jump
3483 that jumps to our ultimate destination. */
3485 /* Where we are going to insert it if we cannot move the jump any farther,
3486 or the jump itself if we have picked up an existing jump. */
3488 /* The ultimate destination. */
3490 struct far_branch *prev;
3491 /* If the branch has already been created, its address;
3492 else the address of its first prospective user. */
3496 static void gen_far_branch PARAMS ((struct far_branch *));
3497 enum mdep_reorg_phase_e mdep_reorg_phase;
3500 struct far_branch *bp;
3502 rtx insn = bp->insert_place;
3504 rtx label = gen_label_rtx ();
3506 emit_label_after (label, insn);
3509 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3510 LABEL_NUSES (bp->far_label)++;
3513 jump = emit_jump_insn_after (gen_return (), insn);
3514 /* Emit a barrier so that reorg knows that any following instructions
3515 are not reachable via a fall-through path.
3516 But don't do this when not optimizing, since we wouldn't supress the
3517 alignment for the barrier then, and could end up with out-of-range
3518 pc-relative loads. */
3520 emit_barrier_after (jump);
3521 emit_label_after (bp->near_label, insn);
3522 JUMP_LABEL (jump) = bp->far_label;
3523 if (! invert_jump (insn, label, 1))
3526 (gen_stuff_delay_slot
3527 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3528 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3530 /* Prevent reorg from undoing our splits. */
3531 gen_block_redirect (jump, bp->address += 2, 2);
3534 /* Fix up ADDR_DIFF_VECs. */
3536 fixup_addr_diff_vecs (first)
3541 for (insn = first; insn; insn = NEXT_INSN (insn))
3543 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3545 if (GET_CODE (insn) != JUMP_INSN
3546 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3548 pat = PATTERN (insn);
3549 vec_lab = XEXP (XEXP (pat, 0), 0);
3551 /* Search the matching casesi_jump_2. */
3552 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3554 if (GET_CODE (prev) != JUMP_INSN)
3556 prevpat = PATTERN (prev);
3557 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3559 x = XVECEXP (prevpat, 0, 1);
3560 if (GET_CODE (x) != USE)
3563 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3567 /* Emit the reference label of the braf where it belongs, right after
3568 the casesi_jump_2 (i.e. braf). */
3569 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3570 emit_label_after (braf_label, prev);
3572 /* Fix up the ADDR_DIF_VEC to be relative
3573 to the reference address of the braf. */
3574 XEXP (XEXP (pat, 0), 0) = braf_label;
3578 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3579 a barrier. Return the base 2 logarithm of the desired alignment. */
3581 barrier_align (barrier_or_label)
3582 rtx barrier_or_label;
3584 rtx next = next_real_insn (barrier_or_label), pat, prev;
3585 int slot, credit, jump_to_next = 0;
3590 pat = PATTERN (next);
3592 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3595 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3596 /* This is a barrier in front of a constant table. */
3599 prev = prev_real_insn (barrier_or_label);
3600 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3602 pat = PATTERN (prev);
3603 /* If this is a very small table, we want to keep the alignment after
3604 the table to the minimum for proper code alignment. */
3605 return ((TARGET_SMALLCODE
3606 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3607 <= (unsigned)1 << (CACHE_LOG - 2)))
3608 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3611 if (TARGET_SMALLCODE)
3614 if (! TARGET_SH2 || ! optimize)
3615 return align_jumps_log;
3617 /* When fixing up pcloads, a constant table might be inserted just before
3618 the basic block that ends with the barrier. Thus, we can't trust the
3619 instruction lengths before that. */
3620 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3622 /* Check if there is an immediately preceding branch to the insn beyond
3623 the barrier. We must weight the cost of discarding useful information
3624 from the current cache line when executing this branch and there is
3625 an alignment, against that of fetching unneeded insn in front of the
3626 branch target when there is no alignment. */
3628 /* There are two delay_slot cases to consider. One is the simple case
3629 where the preceding branch is to the insn beyond the barrier (simple
3630 delay slot filling), and the other is where the preceding branch has
3631 a delay slot that is a duplicate of the insn after the barrier
3632 (fill_eager_delay_slots) and the branch is to the insn after the insn
3633 after the barrier. */
3635 /* PREV is presumed to be the JUMP_INSN for the barrier under
3636 investigation. Skip to the insn before it. */
3637 prev = prev_real_insn (prev);
3639 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3640 credit >= 0 && prev && GET_CODE (prev) == INSN;
3641 prev = prev_real_insn (prev))
3644 if (GET_CODE (PATTERN (prev)) == USE
3645 || GET_CODE (PATTERN (prev)) == CLOBBER)
3647 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3649 prev = XVECEXP (PATTERN (prev), 0, 1);
3650 if (INSN_UID (prev) == INSN_UID (next))
3652 /* Delay slot was filled with insn at jump target. */
3659 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3661 credit -= get_attr_length (prev);
3664 && GET_CODE (prev) == JUMP_INSN
3665 && JUMP_LABEL (prev))
3669 || next_real_insn (JUMP_LABEL (prev)) == next
3670 /* If relax_delay_slots() decides NEXT was redundant
3671 with some previous instruction, it will have
3672 redirected PREV's jump to the following insn. */
3673 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3674 /* There is no upper bound on redundant instructions
3675 that might have been skipped, but we must not put an
3676 alignment where none had been before. */
3677 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3679 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3680 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3681 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3683 rtx pat = PATTERN (prev);
3684 if (GET_CODE (pat) == PARALLEL)
3685 pat = XVECEXP (pat, 0, 0);
3686 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3692 return align_jumps_log;
3695 /* If we are inside a phony loop, almost any kind of label can turn up as the
3696 first one in the loop. Aligning a braf label causes incorrect switch
3697 destination addresses; we can detect braf labels because they are
3698 followed by a BARRIER.
3699 Applying loop alignment to small constant or switch tables is a waste
3700 of space, so we suppress this too. */
3702 sh_loop_align (label)
3708 next = next_nonnote_insn (next);
3709 while (next && GET_CODE (next) == CODE_LABEL);
3713 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3714 || recog_memoized (next) == CODE_FOR_consttable_2)
3717 return align_loops_log;
3720 /* Exported to toplev.c.
3722 Do a final pass over the function, just before delayed branch
3726 machine_dependent_reorg (first)
3729 rtx insn, mova = NULL_RTX;
3731 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3732 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3734 /* We must split call insns before introducing `mova's. If we're
3735 optimizing, they'll have already been split. Otherwise, make
3736 sure we don't split them too late. */
3738 split_all_insns_noflow ();
3743 /* If relaxing, generate pseudo-ops to associate function calls with
3744 the symbols they call. It does no harm to not generate these
3745 pseudo-ops. However, when we can generate them, it enables to
3746 linker to potentially relax the jsr to a bsr, and eliminate the
3747 register load and, possibly, the constant pool entry. */
3749 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3752 /* Remove all REG_LABEL notes. We want to use them for our own
3753 purposes. This works because none of the remaining passes
3754 need to look at them.
3756 ??? But it may break in the future. We should use a machine
3757 dependent REG_NOTE, or some other approach entirely. */
3758 for (insn = first; insn; insn = NEXT_INSN (insn))
3764 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3765 remove_note (insn, note);
3769 for (insn = first; insn; insn = NEXT_INSN (insn))
3771 rtx pattern, reg, link, set, scan, dies, label;
3772 int rescan = 0, foundinsn = 0;
3774 if (GET_CODE (insn) == CALL_INSN)
3776 pattern = PATTERN (insn);
3778 if (GET_CODE (pattern) == PARALLEL)
3779 pattern = XVECEXP (pattern, 0, 0);
3780 if (GET_CODE (pattern) == SET)
3781 pattern = SET_SRC (pattern);
3783 if (GET_CODE (pattern) != CALL
3784 || GET_CODE (XEXP (pattern, 0)) != MEM)
3787 reg = XEXP (XEXP (pattern, 0), 0);
3791 reg = sfunc_uses_reg (insn);
3796 if (GET_CODE (reg) != REG)
3799 /* This is a function call via REG. If the only uses of REG
3800 between the time that it is set and the time that it dies
3801 are in function calls, then we can associate all the
3802 function calls with the setting of REG. */
3804 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3806 if (REG_NOTE_KIND (link) != 0)
3808 set = single_set (XEXP (link, 0));
3809 if (set && rtx_equal_p (reg, SET_DEST (set)))
3811 link = XEXP (link, 0);
3818 /* ??? Sometimes global register allocation will have
3819 deleted the insn pointed to by LOG_LINKS. Try
3820 scanning backward to find where the register is set. */
3821 for (scan = PREV_INSN (insn);
3822 scan && GET_CODE (scan) != CODE_LABEL;
3823 scan = PREV_INSN (scan))
3825 if (! INSN_P (scan))
3828 if (! reg_mentioned_p (reg, scan))
3831 if (noncall_uses_reg (reg, scan, &set))
3845 /* The register is set at LINK. */
3847 /* We can only optimize the function call if the register is
3848 being set to a symbol. In theory, we could sometimes
3849 optimize calls to a constant location, but the assembler
3850 and linker do not support that at present. */
3851 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3852 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3855 /* Scan forward from LINK to the place where REG dies, and
3856 make sure that the only insns which use REG are
3857 themselves function calls. */
3859 /* ??? This doesn't work for call targets that were allocated
3860 by reload, since there may not be a REG_DEAD note for the
3864 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3868 /* Don't try to trace forward past a CODE_LABEL if we haven't
3869 seen INSN yet. Ordinarily, we will only find the setting insn
3870 in LOG_LINKS if it is in the same basic block. However,
3871 cross-jumping can insert code labels in between the load and
3872 the call, and can result in situations where a single call
3873 insn may have two targets depending on where we came from. */
3875 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3878 if (! INSN_P (scan))
3881 /* Don't try to trace forward past a JUMP. To optimize
3882 safely, we would have to check that all the
3883 instructions at the jump destination did not use REG. */
3885 if (GET_CODE (scan) == JUMP_INSN)
3888 if (! reg_mentioned_p (reg, scan))
3891 if (noncall_uses_reg (reg, scan, &scanset))
3898 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3900 /* There is a function call to this register other
3901 than the one we are checking. If we optimize
3902 this call, we need to rescan again below. */
3906 /* ??? We shouldn't have to worry about SCANSET here.
3907 We should just be able to check for a REG_DEAD note
3908 on a function call. However, the REG_DEAD notes are
3909 apparently not dependable around libcalls; c-torture
3910 execute/920501-2 is a test case. If SCANSET is set,
3911 then this insn sets the register, so it must have
3912 died earlier. Unfortunately, this will only handle
3913 the cases in which the register is, in fact, set in a
3916 /* ??? We shouldn't have to use FOUNDINSN here.
3917 However, the LOG_LINKS fields are apparently not
3918 entirely reliable around libcalls;
3919 newlib/libm/math/e_pow.c is a test case. Sometimes
3920 an insn will appear in LOG_LINKS even though it is
3921 not the most recent insn which sets the register. */
3925 || find_reg_note (scan, REG_DEAD, reg)))
3934 /* Either there was a branch, or some insn used REG
3935 other than as a function call address. */
3939 /* Create a code label, and put it in a REG_LABEL note on
3940 the insn which sets the register, and on each call insn
3941 which uses the register. In final_prescan_insn we look
3942 for the REG_LABEL notes, and output the appropriate label
3945 label = gen_label_rtx ();
3946 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3948 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3957 scan = NEXT_INSN (scan);
3959 && ((GET_CODE (scan) == CALL_INSN
3960 && reg_mentioned_p (reg, scan))
3961 || ((reg2 = sfunc_uses_reg (scan))
3962 && REGNO (reg2) == REGNO (reg))))
3964 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3966 while (scan != dies);
3972 fixup_addr_diff_vecs (first);
3976 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3977 shorten_branches (first);
3979 /* Scan the function looking for move instructions which have to be
3980 changed to pc-relative loads and insert the literal tables. */
3982 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3983 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3990 else if (GET_CODE (insn) == JUMP_INSN
3991 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3999 /* Some code might have been inserted between the mova and
4000 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4001 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4002 total += get_attr_length (scan);
4004 /* range of mova is 1020, add 4 because pc counts from address of
4005 second instruction after this one, subtract 2 in case pc is 2
4006 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4007 cancels out with alignment effects of the mova itself. */
4010 /* Change the mova into a load, and restart scanning
4011 there. broken_move will then return true for mova. */
4012 SET_SRC (PATTERN (mova))
4013 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4014 INSN_CODE (mova) = -1;
4018 if (broken_move (insn))
4021 /* Scan ahead looking for a barrier to stick the constant table
4023 rtx barrier = find_barrier (num_mova, mova, insn);
4024 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4026 if (num_mova && ! mova_p (mova))
4028 /* find_barrier had to change the first mova into a
4029 pcload; thus, we have to start with this new pcload. */
4033 /* Now find all the moves between the points and modify them. */
4034 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4036 if (GET_CODE (scan) == CODE_LABEL)
4038 if (broken_move (scan))
4040 rtx *patp = &PATTERN (scan), pat = *patp;
4044 enum machine_mode mode;
4046 if (GET_CODE (pat) == PARALLEL)
4047 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4048 src = SET_SRC (pat);
4049 dst = SET_DEST (pat);
4050 mode = GET_MODE (dst);
4052 if (mode == SImode && hi_const (src)
4053 && REGNO (dst) != FPUL_REG)
4058 while (GET_CODE (dst) == SUBREG)
4060 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4061 GET_MODE (SUBREG_REG (dst)),
4064 dst = SUBREG_REG (dst);
4066 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4069 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4071 /* This must be an insn that clobbers r0. */
4072 rtx clobber = XVECEXP (PATTERN (scan), 0,
4073 XVECLEN (PATTERN (scan), 0) - 1);
4075 if (GET_CODE (clobber) != CLOBBER
4076 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4080 && reg_set_between_p (r0_rtx, last_float_move, scan))
4084 && GET_MODE_SIZE (mode) != 4
4085 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4087 lab = add_constant (src, mode, last_float);
4089 emit_insn_before (gen_mova (lab), scan);
4092 /* There will be a REG_UNUSED note for r0 on
4093 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4094 lest reorg:mark_target_live_regs will not
4095 consider r0 to be used, and we end up with delay
4096 slot insn in front of SCAN that clobbers r0. */
4098 = find_regno_note (last_float_move, REG_UNUSED, 0);
4100 /* If we are not optimizing, then there may not be
4103 PUT_MODE (note, REG_INC);
4105 *last_float_addr = r0_inc_rtx;
4107 last_float_move = scan;
4109 newsrc = gen_rtx (MEM, mode,
4110 (((TARGET_SH4 && ! TARGET_FMOVD)
4111 || REGNO (dst) == FPUL_REG)
4114 last_float_addr = &XEXP (newsrc, 0);
4116 /* Remove the clobber of r0. */
4117 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4118 RTX_UNCHANGING_P (newsrc) = 1;
4120 /* This is a mova needing a label. Create it. */
4121 else if (GET_CODE (src) == UNSPEC
4122 && XINT (src, 1) == UNSPEC_MOVA
4123 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4125 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4126 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4127 newsrc = gen_rtx_UNSPEC (SImode,
4128 gen_rtvec (1, newsrc),
4133 lab = add_constant (src, mode, 0);
4134 newsrc = gen_rtx_MEM (mode,
4135 gen_rtx_LABEL_REF (VOIDmode, lab));
4136 RTX_UNCHANGING_P (newsrc) = 1;
4138 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4139 INSN_CODE (scan) = -1;
4142 dump_table (barrier);
4147 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4148 INSN_ADDRESSES_FREE ();
4149 split_branches (first);
4151 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4152 also has an effect on the register that holds the address of the sfunc.
4153 Insert an extra dummy insn in front of each sfunc that pretends to
4154 use this register. */
4155 if (flag_delayed_branch)
4157 for (insn = first; insn; insn = NEXT_INSN (insn))
4159 rtx reg = sfunc_uses_reg (insn);
4163 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4167 /* fpscr is not actually a user variable, but we pretend it is for the
4168 sake of the previous optimization passes, since we want it handled like
4169 one. However, we don't have any debugging information for it, so turn
4170 it into a non-user variable now. */
4172 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4174 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4178 get_dest_uid (label, max_uid)
4182 rtx dest = next_real_insn (label);
4185 /* This can happen for an undefined label. */
4187 dest_uid = INSN_UID (dest);
4188 /* If this is a newly created branch redirection blocking instruction,
4189 we cannot index the branch_uid or insn_addresses arrays with its
4190 uid. But then, we won't need to, because the actual destination is
4191 the following branch. */
4192 while (dest_uid >= max_uid)
4194 dest = NEXT_INSN (dest);
4195 dest_uid = INSN_UID (dest);
4197 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4202 /* Split condbranches that are out of range. Also add clobbers for
4203 scratch registers that are needed in far jumps.
4204 We do this before delay slot scheduling, so that it can take our
4205 newly created instructions into account. It also allows us to
4206 find branches with common targets more easily. */
4209 split_branches (first)
4213 struct far_branch **uid_branch, *far_branch_list = 0;
4214 int max_uid = get_max_uid ();
4216 /* Find out which branches are out of range. */
4217 shorten_branches (first);
4219 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4220 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4222 for (insn = first; insn; insn = NEXT_INSN (insn))
4223 if (! INSN_P (insn))
4225 else if (INSN_DELETED_P (insn))
4227 /* Shorten_branches would split this instruction again,
4228 so transform it into a note. */
4229 PUT_CODE (insn, NOTE);
4230 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4231 NOTE_SOURCE_FILE (insn) = 0;
4233 else if (GET_CODE (insn) == JUMP_INSN
4234 /* Don't mess with ADDR_DIFF_VEC */
4235 && (GET_CODE (PATTERN (insn)) == SET
4236 || GET_CODE (PATTERN (insn)) == RETURN))
4238 enum attr_type type = get_attr_type (insn);
4239 if (type == TYPE_CBRANCH)
4243 if (get_attr_length (insn) > 4)
4245 rtx src = SET_SRC (PATTERN (insn));
4246 rtx olabel = XEXP (XEXP (src, 1), 0);
4247 int addr = INSN_ADDRESSES (INSN_UID (insn));
4249 int dest_uid = get_dest_uid (olabel, max_uid);
4250 struct far_branch *bp = uid_branch[dest_uid];
4252 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4253 the label if the LABEL_NUSES count drops to zero. There is
4254 always a jump_optimize pass that sets these values, but it
4255 proceeds to delete unreferenced code, and then if not
4256 optimizing, to un-delete the deleted instructions, thus
4257 leaving labels with too low uses counts. */
4260 JUMP_LABEL (insn) = olabel;
4261 LABEL_NUSES (olabel)++;
4265 bp = (struct far_branch *) alloca (sizeof *bp);
4266 uid_branch[dest_uid] = bp;
4267 bp->prev = far_branch_list;
4268 far_branch_list = bp;
4270 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4271 LABEL_NUSES (bp->far_label)++;
4275 label = bp->near_label;
4276 if (! label && bp->address - addr >= CONDJUMP_MIN)
4278 rtx block = bp->insert_place;
4280 if (GET_CODE (PATTERN (block)) == RETURN)
4281 block = PREV_INSN (block);
4283 block = gen_block_redirect (block,
4285 label = emit_label_after (gen_label_rtx (),
4287 bp->near_label = label;
4289 else if (label && ! NEXT_INSN (label))
4291 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4292 bp->insert_place = insn;
4294 gen_far_branch (bp);
4298 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4300 bp->near_label = label = gen_label_rtx ();
4301 bp->insert_place = insn;
4304 if (! redirect_jump (insn, label, 1))
4309 /* get_attr_length (insn) == 2 */
4310 /* Check if we have a pattern where reorg wants to redirect
4311 the branch to a label from an unconditional branch that
4313 /* We can't use JUMP_LABEL here because it might be undefined
4314 when not optimizing. */
4315 /* A syntax error might cause beyond to be NULL_RTX. */
4317 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4321 && (GET_CODE (beyond) == JUMP_INSN
4322 || ((beyond = next_active_insn (beyond))
4323 && GET_CODE (beyond) == JUMP_INSN))
4324 && GET_CODE (PATTERN (beyond)) == SET
4325 && recog_memoized (beyond) == CODE_FOR_jump_compact
4327 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4328 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4330 gen_block_redirect (beyond,
4331 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4334 next = next_active_insn (insn);
4336 if ((GET_CODE (next) == JUMP_INSN
4337 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4338 && GET_CODE (PATTERN (next)) == SET
4339 && recog_memoized (next) == CODE_FOR_jump_compact
4341 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4342 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4344 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4346 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4348 int addr = INSN_ADDRESSES (INSN_UID (insn));
4351 struct far_branch *bp;
4353 if (type == TYPE_JUMP)
4355 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4356 dest_uid = get_dest_uid (far_label, max_uid);
4359 /* Parse errors can lead to labels outside
4361 if (! NEXT_INSN (far_label))
4366 JUMP_LABEL (insn) = far_label;
4367 LABEL_NUSES (far_label)++;
4369 redirect_jump (insn, NULL_RTX, 1);
4373 bp = uid_branch[dest_uid];
4376 bp = (struct far_branch *) alloca (sizeof *bp);
4377 uid_branch[dest_uid] = bp;
4378 bp->prev = far_branch_list;
4379 far_branch_list = bp;
4381 bp->far_label = far_label;
4383 LABEL_NUSES (far_label)++;
4385 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4386 if (addr - bp->address <= CONDJUMP_MAX)
4387 emit_label_after (bp->near_label, PREV_INSN (insn));
4390 gen_far_branch (bp);
4396 bp->insert_place = insn;
4398 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4400 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4403 /* Generate all pending far branches,
4404 and free our references to the far labels. */
4405 while (far_branch_list)
4407 if (far_branch_list->near_label
4408 && ! NEXT_INSN (far_branch_list->near_label))
4409 gen_far_branch (far_branch_list);
4411 && far_branch_list->far_label
4412 && ! --LABEL_NUSES (far_branch_list->far_label))
4413 delete_insn (far_branch_list->far_label);
4414 far_branch_list = far_branch_list->prev;
4417 /* Instruction length information is no longer valid due to the new
4418 instructions that have been generated. */
4419 init_insn_lengths ();
4422 /* Dump out instruction addresses, which is useful for debugging the
4423 constant pool table stuff.
4425 If relaxing, output the label and pseudo-ops used to link together
4426 calls and the instruction which set the registers. */
4428 /* ??? The addresses printed by this routine for insns are nonsense for
4429 insns which are inside of a sequence where none of the inner insns have
4430 variable length. This is because the second pass of shorten_branches
4431 does not bother to update them. */
4434 final_prescan_insn (insn, opvec, noperands)
4436 rtx *opvec ATTRIBUTE_UNUSED;
4437 int noperands ATTRIBUTE_UNUSED;
4439 if (TARGET_DUMPISIZE)
4440 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4446 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4451 pattern = PATTERN (insn);
4452 if (GET_CODE (pattern) == PARALLEL)
4453 pattern = XVECEXP (pattern, 0, 0);
4454 if (GET_CODE (pattern) == CALL
4455 || (GET_CODE (pattern) == SET
4456 && (GET_CODE (SET_SRC (pattern)) == CALL
4457 || get_attr_type (insn) == TYPE_SFUNC)))
4458 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4459 CODE_LABEL_NUMBER (XEXP (note, 0)));
4460 else if (GET_CODE (pattern) == SET)
4461 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4462 CODE_LABEL_NUMBER (XEXP (note, 0)));
4469 /* Dump out any constants accumulated in the final pass. These will
4473 output_jump_label_table ()
4479 fprintf (asm_out_file, "\t.align 2\n");
4480 for (i = 0; i < pool_size; i++)
4482 pool_node *p = &pool_vector[i];
4484 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4485 CODE_LABEL_NUMBER (p->label));
4486 output_asm_insn (".long %O0", &p->value);
4494 /* A full frame looks like:
4498 [ if current_function_anonymous_args
4511 local-0 <- fp points here. */
4513 /* Number of bytes pushed for anonymous args, used to pass information
4514 between expand_prologue and expand_epilogue. */
4516 static int extra_push;
4518 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4519 to be adjusted, and TEMP, if nonnegative, holds the register number
4520 of a general register that we may clobber. */
4523 output_stack_adjust (size, reg, temp, emit_fn)
4527 rtx (*emit_fn) PARAMS ((rtx));
4531 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4536 if (CONST_OK_FOR_ADD (size))
4537 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4538 /* Try to do it with two partial adjustments; however, we must make
4539 sure that the stack is properly aligned at all times, in case
4540 an interrupt occurs between the two partial adjustments. */
4541 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4542 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4544 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4545 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4552 /* If TEMP is invalid, we could temporarily save a general
4553 register to MACL. However, there is currently no need
4554 to handle this case, so just abort when we see it. */
4557 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4559 /* If SIZE is negative, subtract the positive value.
4560 This sometimes allows a constant pool entry to be shared
4561 between prologue and epilogue code. */
4564 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4565 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4569 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4570 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4572 if (emit_fn == frame_insn)
4574 = (gen_rtx_EXPR_LIST
4575 (REG_FRAME_RELATED_EXPR,
4576 gen_rtx_SET (VOIDmode, reg,
4577 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4588 RTX_FRAME_RELATED_P (x) = 1;
4592 /* Output RTL to push register RN onto the stack. */
4600 x = gen_push_fpul ();
4601 else if (rn == FPSCR_REG)
4602 x = gen_push_fpscr ();
4603 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4604 && FP_OR_XD_REGISTER_P (rn))
4606 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4608 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4610 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4611 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4613 x = gen_push (gen_rtx_REG (SImode, rn));
4617 = gen_rtx_EXPR_LIST (REG_INC,
4618 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4622 /* Output RTL to pop register RN from the stack. */
4630 x = gen_pop_fpul ();
4631 else if (rn == FPSCR_REG)
4632 x = gen_pop_fpscr ();
4633 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4634 && FP_OR_XD_REGISTER_P (rn))
4636 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4638 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4640 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4641 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4643 x = gen_pop (gen_rtx_REG (SImode, rn));
4647 = gen_rtx_EXPR_LIST (REG_INC,
4648 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4651 /* Generate code to push the regs specified in the mask. */
4654 push_regs (mask, interrupt_handler)
4656 int interrupt_handler;
4661 /* Push PR last; this gives better latencies after the prologue, and
4662 candidates for the return delay slot when there are no general
4663 registers pushed. */
4664 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4666 /* If this is an interrupt handler, and the SZ bit varies,
4667 and we have to push any floating point register, we need
4668 to switch to the correct precision first. */
4669 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4670 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4672 HARD_REG_SET unsaved;
4675 COMPL_HARD_REG_SET(unsaved, *mask);
4676 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4680 && (i != FPSCR_REG || ! skip_fpscr)
4681 && TEST_HARD_REG_BIT (*mask, i))
4684 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4688 /* Work out the registers which need to be saved, both as a mask and a
4689 count of saved words. Return the count.
4691 If doing a pragma interrupt function, then push all regs used by the
4692 function, and if we call another function (we can tell by looking at PR),
4693 make sure that all the regs it clobbers are safe too. */
4696 calc_live_regs (live_regs_mask)
4697 HARD_REG_SET *live_regs_mask;
4701 int interrupt_handler;
4704 interrupt_handler = sh_cfun_interrupt_handler_p ();
4706 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4707 CLEAR_HARD_REG_SET (*live_regs_mask);
4708 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4709 && regs_ever_live[FPSCR_REG])
4710 target_flags &= ~FPU_SINGLE_BIT;
4711 /* If we can save a lot of saves by switching to double mode, do that. */
4712 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4713 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4714 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4715 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4718 target_flags &= ~FPU_SINGLE_BIT;
4721 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4722 knows how to use it. That means the pseudo originally allocated for
4723 the initial value can become the PR_MEDIA_REG hard register, as seen for
4724 execute/20010122-1.c:test9. */
4726 pr_live = regs_ever_live[PR_MEDIA_REG];
4729 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4730 pr_live = (pr_initial
4731 ? (GET_CODE (pr_initial) != REG
4732 || REGNO (pr_initial) != (PR_REG))
4733 : regs_ever_live[PR_REG]);
4735 /* Force PR to be live if the prologue has to call the SHmedia
4736 argument decoder or register saver. */
4737 if (TARGET_SHCOMPACT
4738 && ((current_function_args_info.call_cookie
4739 & ~ CALL_COOKIE_RET_TRAMP (1))
4740 || current_function_has_nonlocal_label))
4742 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4744 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4746 : (interrupt_handler && ! pragma_trapa)
4747 ? (/* Need to save all the regs ever live. */
4748 (regs_ever_live[reg]
4749 || (call_used_regs[reg]
4750 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4752 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4753 && reg != RETURN_ADDRESS_POINTER_REGNUM
4754 && reg != T_REG && reg != GBR_REG
4755 /* Push fpscr only on targets which have FPU */
4756 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4757 : (/* Only push those regs which are used and need to be saved. */
4760 && current_function_args_info.call_cookie
4761 && reg == PIC_OFFSET_TABLE_REGNUM)
4762 || (regs_ever_live[reg] && ! call_used_regs[reg])
4763 || (current_function_calls_eh_return
4764 && (reg == EH_RETURN_DATA_REGNO (0)
4765 || reg == EH_RETURN_DATA_REGNO (1)
4766 || reg == EH_RETURN_DATA_REGNO (2)
4767 || reg == EH_RETURN_DATA_REGNO (3)))))
4769 SET_HARD_REG_BIT (*live_regs_mask, reg);
4770 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4772 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4773 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4775 if (FP_REGISTER_P (reg))
4777 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4779 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
4780 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4783 else if (XD_REGISTER_P (reg))
4785 /* Must switch to double mode to access these registers. */
4786 target_flags &= ~FPU_SINGLE_BIT;
4795 /* Code to generate prologue and epilogue sequences */
4797 /* PUSHED is the number of bytes that are being pushed on the
4798 stack for register saves. Return the frame size, padded
4799 appropriately so that the stack stays properly aligned. */
4800 static HOST_WIDE_INT
4801 rounded_frame_size (pushed)
4804 HOST_WIDE_INT size = get_frame_size ();
4805 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4807 return ((size + pushed + align - 1) & -align) - pushed;
4810 /* Choose a call-clobbered target-branch register that remains
4811 unchanged along the whole function. We set it up as the return
4812 value in the prologue. */
4814 sh_media_register_for_return ()
4819 if (! current_function_is_leaf)
4822 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4824 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4825 if (call_used_regs[regno] && ! regs_ever_live[regno])
4832 sh_expand_prologue ()
4834 HARD_REG_SET live_regs_mask;
4837 int save_flags = target_flags;
4839 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4841 /* We have pretend args if we had an object sent partially in registers
4842 and partially on the stack, e.g. a large structure. */
4843 output_stack_adjust (-current_function_pretend_args_size
4844 - current_function_args_info.stack_regs * 8,
4845 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4849 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4850 /* We're going to use the PIC register to load the address of the
4851 incoming-argument decoder and/or of the return trampoline from
4852 the GOT, so make sure the PIC register is preserved and
4854 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4856 if (TARGET_SHCOMPACT
4857 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4861 /* First, make all registers with incoming arguments that will
4862 be pushed onto the stack live, so that register renaming
4863 doesn't overwrite them. */
4864 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4865 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4866 >= NPARM_REGS (SImode) - reg)
4867 for (; reg < NPARM_REGS (SImode); reg++)
4868 emit_insn (gen_shcompact_preserve_incoming_args
4869 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4870 else if (CALL_COOKIE_INT_REG_GET
4871 (current_function_args_info.call_cookie, reg) == 1)
4872 emit_insn (gen_shcompact_preserve_incoming_args
4873 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4875 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4877 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4878 GEN_INT (current_function_args_info.call_cookie));
4879 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4880 gen_rtx_REG (SImode, R0_REG));
4882 else if (TARGET_SHMEDIA)
4884 int tr = sh_media_register_for_return ();
4888 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4889 gen_rtx_REG (DImode, PR_MEDIA_REG));
4891 /* If this function only exits with sibcalls, this copy
4892 will be flagged as dead. */
4893 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4899 /* Emit the code for SETUP_VARARGS. */
4900 if (current_function_stdarg)
4902 /* This is not used by the SH2E calling convention */
4903 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 && ! TARGET_HITACHI)
4905 /* Push arg regs as if they'd been provided by caller in stack. */
4906 for (i = 0; i < NPARM_REGS(SImode); i++)
4908 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4911 if (i >= (NPARM_REGS(SImode)
4912 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4916 RTX_FRAME_RELATED_P (insn) = 0;
4922 /* If we're supposed to switch stacks at function entry, do so now. */
4924 emit_insn (gen_sp_switch_1 ());
4926 d = calc_live_regs (&live_regs_mask);
4927 /* ??? Maybe we could save some switching if we can move a mode switch
4928 that already happens to be at the function start into the prologue. */
4929 if (target_flags != save_flags && ! current_function_interrupt)
4930 emit_insn (gen_toggle_sz ());
4937 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4938 int offset_in_r0 = -1;
4941 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4942 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4943 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4945 offset = d + d_rounding;
4946 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4948 /* We loop twice: first, we save 8-byte aligned registers in the
4949 higher addresses, that are known to be aligned. Then, we
4950 proceed to saving 32-bit registers that don't need 8-byte
4952 /* Note that if you change this code in a way that affects where
4953 the return register is saved, you have to update not only
4954 sh_expand_epilogue, but also sh_set_return_address. */
4955 for (align = 1; align >= 0; align--)
4956 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4957 if (TEST_HARD_REG_BIT (live_regs_mask, i))
4959 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4961 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4963 if (mode == SFmode && (i % 2) == 1
4964 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4965 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
4972 /* If we're doing the aligned pass and this is not aligned,
4973 or we're doing the unaligned pass and this is aligned,
4975 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4979 offset -= GET_MODE_SIZE (mode);
4981 reg_rtx = gen_rtx_REG (mode, reg);
4983 mem_rtx = gen_rtx_MEM (mode,
4984 gen_rtx_PLUS (Pmode,
4988 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
4994 if (HAVE_PRE_DECREMENT
4995 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
4996 || mem_rtx == NULL_RTX
4997 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4999 pre_dec = gen_rtx_MEM (mode,
5000 gen_rtx_PRE_DEC (Pmode, r0));
5002 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5011 offset += GET_MODE_SIZE (mode);
5015 if (mem_rtx != NULL_RTX)
5018 if (offset_in_r0 == -1)
5020 emit_move_insn (r0, GEN_INT (offset));
5021 offset_in_r0 = offset;
5023 else if (offset != offset_in_r0)
5028 GEN_INT (offset - offset_in_r0)));
5029 offset_in_r0 += offset - offset_in_r0;
5032 if (pre_dec != NULL_RTX)
5038 (Pmode, r0, stack_pointer_rtx));
5042 offset -= GET_MODE_SIZE (mode);
5043 offset_in_r0 -= GET_MODE_SIZE (mode);
5048 mem_rtx = gen_rtx_MEM (mode, r0);
5050 mem_rtx = gen_rtx_MEM (mode,
5051 gen_rtx_PLUS (Pmode,
5055 /* We must not use an r0-based address for target-branch
5056 registers or for special registers without pre-dec
5057 memory addresses, since we store their values in r0
5059 if (TARGET_REGISTER_P (i)
5060 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5061 && mem_rtx != pre_dec))
5065 if (TARGET_REGISTER_P (i)
5066 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5067 && mem_rtx != pre_dec))
5069 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
5071 emit_move_insn (r0mode, reg_rtx);
5079 emit_move_insn (mem_rtx, reg_rtx);
5082 if (offset != d_rounding)
5086 push_regs (&live_regs_mask, current_function_interrupt);
5088 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5090 rtx insn = get_last_insn ();
5091 rtx last = emit_insn (gen_GOTaddr2picreg ());
5093 /* Mark these insns as possibly dead. Sometimes, flow2 may
5094 delete all uses of the PIC register. In this case, let it
5095 delete the initialization too. */
5098 insn = NEXT_INSN (insn);
5100 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5104 while (insn != last);
5107 if (SHMEDIA_REGS_STACK_ADJUST ())
5109 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5110 gen_rtx_SYMBOL_REF (Pmode,
5112 ? "__GCC_push_shmedia_regs"
5113 : "__GCC_push_shmedia_regs_nofpu"));
5114 /* This must NOT go through the PLT, otherwise mach and macl
5115 may be clobbered. */
5116 emit_insn (gen_shmedia_save_restore_regs_compact
5117 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5120 if (target_flags != save_flags && ! current_function_interrupt)
5122 rtx insn = emit_insn (gen_toggle_sz ());
5124 /* If we're lucky, a mode switch in the function body will
5125 overwrite fpscr, turning this insn dead. Tell flow this
5126 insn is ok to delete. */
5127 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5132 target_flags = save_flags;
5134 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5135 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
5137 if (frame_pointer_needed)
5138 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5140 if (TARGET_SHCOMPACT
5141 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5143 /* This must NOT go through the PLT, otherwise mach and macl
5144 may be clobbered. */
5145 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5146 gen_rtx_SYMBOL_REF (Pmode,
5147 "__GCC_shcompact_incoming_args"));
5148 emit_insn (gen_shcompact_incoming_args ());
5153 sh_expand_epilogue ()
5155 HARD_REG_SET live_regs_mask;
5159 int save_flags = target_flags;
5161 int fpscr_deferred = 0;
5163 d = calc_live_regs (&live_regs_mask);
5165 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
5166 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5167 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5169 frame_size = rounded_frame_size (d) - d_rounding;
5171 if (frame_pointer_needed)
5173 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
5175 /* We must avoid moving the stack pointer adjustment past code
5176 which reads from the local frame, else an interrupt could
5177 occur after the SP adjustment and clobber data in the local
5179 emit_insn (gen_blockage ());
5180 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5182 else if (frame_size)
5184 /* We must avoid moving the stack pointer adjustment past code
5185 which reads from the local frame, else an interrupt could
5186 occur after the SP adjustment and clobber data in the local
5188 emit_insn (gen_blockage ());
5189 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
5192 if (SHMEDIA_REGS_STACK_ADJUST ())
5194 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5195 gen_rtx_SYMBOL_REF (Pmode,
5197 ? "__GCC_pop_shmedia_regs"
5198 : "__GCC_pop_shmedia_regs_nofpu"));
5199 /* This must NOT go through the PLT, otherwise mach and macl
5200 may be clobbered. */
5201 emit_insn (gen_shmedia_save_restore_regs_compact
5202 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5205 /* Pop all the registers. */
5207 if (target_flags != save_flags && ! current_function_interrupt)
5208 emit_insn (gen_toggle_sz ());
5211 int offset = d_rounding;
5212 int offset_in_r0 = -1;
5215 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5216 int tmp_regno = R20_REG;
5218 /* We loop twice: first, we save 8-byte aligned registers in the
5219 higher addresses, that are known to be aligned. Then, we
5220 proceed to saving 32-bit registers that don't need 8-byte
5222 for (align = 0; align <= 1; align++)
5223 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5224 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5226 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5228 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5230 if (mode == SFmode && (i % 2) == 0
5231 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5232 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5238 /* If we're doing the aligned pass and this is not aligned,
5239 or we're doing the unaligned pass and this is aligned,
5241 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5245 reg_rtx = gen_rtx_REG (mode, reg);
5247 mem_rtx = gen_rtx_MEM (mode,
5248 gen_rtx_PLUS (Pmode,
5252 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5258 if (HAVE_POST_INCREMENT
5259 && (offset == offset_in_r0
5260 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5261 && mem_rtx == NULL_RTX)
5262 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5264 post_inc = gen_rtx_MEM (mode,
5265 gen_rtx_POST_INC (Pmode, r0));
5267 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5270 post_inc = NULL_RTX;
5279 if (mem_rtx != NULL_RTX)
5282 if (offset_in_r0 == -1)
5284 emit_move_insn (r0, GEN_INT (offset));
5285 offset_in_r0 = offset;
5287 else if (offset != offset_in_r0)
5292 GEN_INT (offset - offset_in_r0)));
5293 offset_in_r0 += offset - offset_in_r0;
5296 if (post_inc != NULL_RTX)
5302 (Pmode, r0, stack_pointer_rtx));
5308 offset_in_r0 += GET_MODE_SIZE (mode);
5311 mem_rtx = gen_rtx_MEM (mode, r0);
5313 mem_rtx = gen_rtx_MEM (mode,
5314 gen_rtx_PLUS (Pmode,
5318 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5319 && mem_rtx != post_inc)
5323 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5324 && mem_rtx != post_inc)
5326 insn = emit_move_insn (r0, mem_rtx);
5329 else if (TARGET_REGISTER_P (i))
5331 rtx tmp_reg = gen_rtx_REG (mode, tmp_regno);
5333 /* Give the scheduler a bit of freedom by using R20..R23
5334 in a round-robin fashion. Don't use R1 here because
5335 we want to use it for EH_RETURN_STACKADJ_RTX. */
5336 insn = emit_move_insn (tmp_reg, mem_rtx);
5338 if (++tmp_regno > R23_REG)
5339 tmp_regno = R20_REG;
5342 insn = emit_move_insn (reg_rtx, mem_rtx);
5344 offset += GET_MODE_SIZE (mode);
5347 if (offset != d + d_rounding)
5354 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5356 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5358 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5360 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5361 && hard_regs_intersect_p (&live_regs_mask,
5362 ®_class_contents[DF_REGS]))
5364 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5366 if (j == FIRST_FP_REG && fpscr_deferred)
5370 if (target_flags != save_flags && ! current_function_interrupt)
5371 emit_insn (gen_toggle_sz ());
5372 target_flags = save_flags;
5374 output_stack_adjust (extra_push + current_function_pretend_args_size
5376 + current_function_args_info.stack_regs * 8,
5377 stack_pointer_rtx, 7, emit_insn);
5379 if (current_function_calls_eh_return)
5380 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5381 EH_RETURN_STACKADJ_RTX));
5383 /* Switch back to the normal stack if necessary. */
5385 emit_insn (gen_sp_switch_2 ());
5387 /* Tell flow the insn that pops PR isn't dead. */
5388 /* PR_REG will never be live in SHmedia mode, and we don't need to
5389 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5390 by the return pattern. */
5391 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5392 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5395 static int sh_need_epilogue_known = 0;
5400 if (! sh_need_epilogue_known)
5405 sh_expand_epilogue ();
5406 epilogue = get_insns ();
5408 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5410 return sh_need_epilogue_known > 0;
5413 /* Emit code to change the current function's return address to RA.
5414 TEMP is available as a scratch register, if needed. */
5417 sh_set_return_address (ra, tmp)
5420 HARD_REG_SET live_regs_mask;
5423 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5426 d = calc_live_regs (&live_regs_mask);
5428 /* If pr_reg isn't life, we can set it (or the register given in
5429 sh_media_register_for_return) directly. */
5430 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5436 int rr_regno = sh_media_register_for_return ();
5441 rr = gen_rtx_REG (DImode, rr_regno);
5444 rr = gen_rtx_REG (SImode, pr_reg);
5446 emit_insn (GEN_MOV (rr, ra));
5447 /* Tell flow the register for return isn't dead. */
5448 emit_insn (gen_rtx_USE (VOIDmode, rr));
5458 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5459 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5460 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5464 /* We loop twice: first, we save 8-byte aligned registers in the
5465 higher addresses, that are known to be aligned. Then, we
5466 proceed to saving 32-bit registers that don't need 8-byte
5468 for (align = 0; align <= 1; align++)
5469 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5470 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5472 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5474 if (mode == SFmode && (i % 2) == 0
5475 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5476 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5482 /* If we're doing the aligned pass and this is not aligned,
5483 or we're doing the unaligned pass and this is aligned,
5485 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5492 offset += GET_MODE_SIZE (mode);
5495 /* We can't find pr register. */
5499 pr_offset = (rounded_frame_size (d) - d_rounding + offset
5500 + SHMEDIA_REGS_STACK_ADJUST ());
5503 pr_offset = rounded_frame_size (d) - d_rounding;
5505 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5506 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5508 tmp = gen_rtx_MEM (Pmode, tmp);
5509 emit_insn (GEN_MOV (tmp, ra));
5512 /* Clear variables at function end. */
5515 sh_output_function_epilogue (file, size)
5516 FILE *file ATTRIBUTE_UNUSED;
5517 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5519 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5520 sh_need_epilogue_known = 0;
5521 sp_switch = NULL_RTX;
5525 sh_builtin_saveregs ()
5527 /* First unnamed integer register. */
5528 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5529 /* Number of integer registers we need to save. */
5530 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5531 /* First unnamed SFmode float reg */
5532 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5533 /* Number of SFmode float regs to save. */
5534 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5537 HOST_WIDE_INT alias_set;
5543 int pushregs = n_intregs;
5545 while (pushregs < NPARM_REGS (SImode) - 1
5546 && (CALL_COOKIE_INT_REG_GET
5547 (current_function_args_info.call_cookie,
5548 NPARM_REGS (SImode) - pushregs)
5551 current_function_args_info.call_cookie
5552 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5557 if (pushregs == NPARM_REGS (SImode))
5558 current_function_args_info.call_cookie
5559 |= (CALL_COOKIE_INT_REG (0, 1)
5560 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5562 current_function_args_info.call_cookie
5563 |= CALL_COOKIE_STACKSEQ (pushregs);
5565 current_function_pretend_args_size += 8 * n_intregs;
5567 if (TARGET_SHCOMPACT)
5571 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5573 error ("__builtin_saveregs not supported by this subtarget");
5580 /* Allocate block of memory for the regs. */
5581 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5582 Or can assign_stack_local accept a 0 SIZE argument? */
5583 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5586 regbuf = gen_rtx_MEM (BLKmode,
5587 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5588 else if (n_floatregs & 1)
5592 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5593 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5594 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5595 regbuf = change_address (regbuf, BLKmode, addr);
5598 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5599 alias_set = get_varargs_alias_set ();
5600 set_mem_alias_set (regbuf, alias_set);
5603 This is optimized to only save the regs that are necessary. Explicitly
5604 named args need not be saved. */
5606 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5607 adjust_address (regbuf, BLKmode,
5608 n_floatregs * UNITS_PER_WORD),
5612 /* Return the address of the regbuf. */
5613 return XEXP (regbuf, 0);
5616 This is optimized to only save the regs that are necessary. Explicitly
5617 named args need not be saved.
5618 We explicitly build a pointer to the buffer because it halves the insn
5619 count when not optimizing (otherwise the pointer is built for each reg
5621 We emit the moves in reverse order so that we can use predecrement. */
5623 fpregs = gen_reg_rtx (Pmode);
5624 emit_move_insn (fpregs, XEXP (regbuf, 0));
5625 emit_insn (gen_addsi3 (fpregs, fpregs,
5626 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5630 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5632 emit_insn (gen_addsi3 (fpregs, fpregs,
5633 GEN_INT (-2 * UNITS_PER_WORD)));
5634 mem = gen_rtx_MEM (DFmode, fpregs);
5635 set_mem_alias_set (mem, alias_set);
5636 emit_move_insn (mem,
5637 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5639 regno = first_floatreg;
5642 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5643 mem = gen_rtx_MEM (SFmode, fpregs);
5644 set_mem_alias_set (mem, alias_set);
5645 emit_move_insn (mem,
5646 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5647 - (TARGET_LITTLE_ENDIAN != 0)));
5651 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5655 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5656 mem = gen_rtx_MEM (SFmode, fpregs);
5657 set_mem_alias_set (mem, alias_set);
5658 emit_move_insn (mem,
5659 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5662 /* Return the address of the regbuf. */
5663 return XEXP (regbuf, 0);
5666 /* Define the `__builtin_va_list' type for the ABI. */
5671 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5674 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5675 return ptr_type_node;
5677 record = make_node (RECORD_TYPE);
5679 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5681 f_next_o_limit = build_decl (FIELD_DECL,
5682 get_identifier ("__va_next_o_limit"),
5684 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5686 f_next_fp_limit = build_decl (FIELD_DECL,
5687 get_identifier ("__va_next_fp_limit"),
5689 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5692 DECL_FIELD_CONTEXT (f_next_o) = record;
5693 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5694 DECL_FIELD_CONTEXT (f_next_fp) = record;
5695 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5696 DECL_FIELD_CONTEXT (f_next_stack) = record;
5698 TYPE_FIELDS (record) = f_next_o;
5699 TREE_CHAIN (f_next_o) = f_next_o_limit;
5700 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5701 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5702 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5704 layout_type (record);
5709 /* Implement `va_start' for varargs and stdarg. */
5712 sh_va_start (valist, nextarg)
5716 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5717 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5723 expand_builtin_saveregs ();
5724 std_expand_builtin_va_start (valist, nextarg);
5728 if ((! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5730 std_expand_builtin_va_start (valist, nextarg);
5734 f_next_o = TYPE_FIELDS (va_list_type_node);
5735 f_next_o_limit = TREE_CHAIN (f_next_o);
5736 f_next_fp = TREE_CHAIN (f_next_o_limit);
5737 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5738 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5740 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5741 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5742 valist, f_next_o_limit);
5743 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5744 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5745 valist, f_next_fp_limit);
5746 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5747 valist, f_next_stack);
5749 /* Call __builtin_saveregs. */
5750 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5751 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5752 TREE_SIDE_EFFECTS (t) = 1;
5753 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5755 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5760 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5761 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5762 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5763 TREE_SIDE_EFFECTS (t) = 1;
5764 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5766 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5767 TREE_SIDE_EFFECTS (t) = 1;
5768 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5770 nint = current_function_args_info.arg_count[SH_ARG_INT];
5775 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5776 build_int_2 (UNITS_PER_WORD * nint, 0)));
5777 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5778 TREE_SIDE_EFFECTS (t) = 1;
5779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5781 u = make_tree (ptr_type_node, nextarg);
5782 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5783 TREE_SIDE_EFFECTS (t) = 1;
5784 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5787 /* Implement `va_arg'. */
5790 sh_va_arg (valist, type)
5793 HOST_WIDE_INT size, rsize;
5794 tree tmp, pptr_type_node;
5797 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5799 size = int_size_in_bytes (type);
5800 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5801 pptr_type_node = build_pointer_type (ptr_type_node);
5804 type = build_pointer_type (type);
5806 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) && ! TARGET_HITACHI)
5808 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5809 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5811 rtx lab_false, lab_over;
5813 f_next_o = TYPE_FIELDS (va_list_type_node);
5814 f_next_o_limit = TREE_CHAIN (f_next_o);
5815 f_next_fp = TREE_CHAIN (f_next_o_limit);
5816 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5817 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5819 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5820 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5821 valist, f_next_o_limit);
5822 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5824 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5825 valist, f_next_fp_limit);
5826 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5827 valist, f_next_stack);
5831 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5832 || (TREE_CODE (type) == COMPLEX_TYPE
5833 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5838 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5841 addr_rtx = gen_reg_rtx (Pmode);
5842 lab_false = gen_label_rtx ();
5843 lab_over = gen_label_rtx ();
5848 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5849 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5851 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5853 expand_expr (next_fp_limit, NULL_RTX,
5854 Pmode, EXPAND_NORMAL),
5855 GE, const1_rtx, Pmode, 1, lab_false);
5857 if (TYPE_ALIGN (type) > BITS_PER_WORD
5858 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5859 && (n_floatregs & 1)))
5861 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5862 build_int_2 (UNITS_PER_WORD, 0));
5863 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5864 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5865 TREE_SIDE_EFFECTS (tmp) = 1;
5866 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5869 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5870 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5872 emit_move_insn (addr_rtx, r);
5874 emit_jump_insn (gen_jump (lab_over));
5876 emit_label (lab_false);
5878 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5879 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5881 emit_move_insn (addr_rtx, r);
5885 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5886 build_int_2 (rsize, 0));
5888 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5890 expand_expr (next_o_limit, NULL_RTX,
5891 Pmode, EXPAND_NORMAL),
5892 GT, const1_rtx, Pmode, 1, lab_false);
5894 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5895 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5897 emit_move_insn (addr_rtx, r);
5899 emit_jump_insn (gen_jump (lab_over));
5901 emit_label (lab_false);
5903 if (size > 4 && ! TARGET_SH4)
5905 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5906 TREE_SIDE_EFFECTS (tmp) = 1;
5907 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5910 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5911 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5913 emit_move_insn (addr_rtx, r);
5916 emit_label (lab_over);
5918 tmp = make_tree (pptr_type_node, addr_rtx);
5919 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5922 /* ??? In va-sh.h, there had been code to make values larger than
5923 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5925 result = std_expand_builtin_va_arg (valist, type);
5928 #ifdef POINTERS_EXTEND_UNSIGNED
5929 if (GET_MODE (addr) != Pmode)
5930 addr = convert_memory_address (Pmode, result);
5932 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5933 set_mem_alias_set (result, get_varargs_alias_set ());
5935 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5936 argument to the varargs alias set. */
5940 /* Define the offset between two registers, one to be eliminated, and
5941 the other its replacement, at the start of a routine. */
5944 initial_elimination_offset (from, to)
5949 int regs_saved_rounding = 0;
5950 int total_saved_regs_space;
5951 int total_auto_space;
5952 int save_flags = target_flags;
5955 HARD_REG_SET live_regs_mask;
5956 regs_saved = calc_live_regs (&live_regs_mask);
5957 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5958 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5959 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5960 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5962 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5963 copy_flags = target_flags;
5964 target_flags = save_flags;
5966 total_saved_regs_space = regs_saved + regs_saved_rounding;
5968 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5969 return total_saved_regs_space + total_auto_space
5970 + current_function_args_info.byref_regs * 8;
5972 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5973 return total_saved_regs_space + total_auto_space
5974 + current_function_args_info.byref_regs * 8;
5976 /* Initial gap between fp and sp is 0. */
5977 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5980 if (from == RETURN_ADDRESS_POINTER_REGNUM
5981 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5985 int i, n = total_saved_regs_space;
5987 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5989 n += total_auto_space;
5991 /* If it wasn't saved, there's not much we can do. */
5992 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5995 target_flags = copy_flags;
5997 /* We loop twice: first, check 8-byte aligned registers,
5998 that are stored in the higher addresses, that are known
5999 to be aligned. Then, check 32-bit registers that don't
6000 need 8-byte alignment. */
6001 for (align = 1; align >= 0; align--)
6002 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6003 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6005 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6007 if (mode == SFmode && (i % 2) == 1
6008 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6009 && TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1)))
6015 /* If we're doing the aligned pass and this is not aligned,
6016 or we're doing the unaligned pass and this is aligned,
6018 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
6022 n -= GET_MODE_SIZE (mode);
6026 target_flags = save_flags;
6034 return total_auto_space;
6040 /* Handle machine specific pragmas to be semi-compatible with Hitachi
6044 sh_pr_interrupt (pfile)
6045 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6047 pragma_interrupt = 1;
6052 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6054 pragma_interrupt = pragma_trapa = 1;
6058 sh_pr_nosave_low_regs (pfile)
6059 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6061 pragma_nosave_low_regs = 1;
6064 /* Generate 'handle_interrupt' attribute for decls */
6067 sh_insert_attributes (node, attributes)
6071 if (! pragma_interrupt
6072 || TREE_CODE (node) != FUNCTION_DECL)
6075 /* We are only interested in fields. */
6076 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6079 /* Add a 'handle_interrupt' attribute. */
6080 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6085 /* Supported attributes:
6087 interrupt_handler -- specifies this function is an interrupt handler.
6089 sp_switch -- specifies an alternate stack for an interrupt handler
6092 trap_exit -- use a trapa to exit an interrupt function instead of
6093 an rte instruction. */
6095 const struct attribute_spec sh_attribute_table[] =
6097 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6098 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6099 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6100 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6101 { NULL, 0, 0, false, false, false, NULL }
6104 /* Handle an "interrupt_handler" attribute; arguments as in
6105 struct attribute_spec.handler. */
6107 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
6110 tree args ATTRIBUTE_UNUSED;
6111 int flags ATTRIBUTE_UNUSED;
6114 if (TREE_CODE (*node) != FUNCTION_DECL)
6116 warning ("`%s' attribute only applies to functions",
6117 IDENTIFIER_POINTER (name));
6118 *no_add_attrs = true;
6120 else if (TARGET_SHCOMPACT)
6122 error ("attribute interrupt_handler is not compatible with -m5-compact");
6123 *no_add_attrs = true;
6129 /* Handle an "sp_switch" attribute; arguments as in
6130 struct attribute_spec.handler. */
6132 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
6136 int flags ATTRIBUTE_UNUSED;
6139 if (TREE_CODE (*node) != FUNCTION_DECL)
6141 warning ("`%s' attribute only applies to functions",
6142 IDENTIFIER_POINTER (name));
6143 *no_add_attrs = true;
6145 else if (!pragma_interrupt)
6147 /* The sp_switch attribute only has meaning for interrupt functions. */
6148 warning ("`%s' attribute only applies to interrupt functions",
6149 IDENTIFIER_POINTER (name));
6150 *no_add_attrs = true;
6152 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6154 /* The argument must be a constant string. */
6155 warning ("`%s' attribute argument not a string constant",
6156 IDENTIFIER_POINTER (name));
6157 *no_add_attrs = true;
6161 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6162 TREE_STRING_POINTER (TREE_VALUE (args)));
6168 /* Handle an "trap_exit" attribute; arguments as in
6169 struct attribute_spec.handler. */
6171 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
6175 int flags ATTRIBUTE_UNUSED;
6178 if (TREE_CODE (*node) != FUNCTION_DECL)
6180 warning ("`%s' attribute only applies to functions",
6181 IDENTIFIER_POINTER (name));
6182 *no_add_attrs = true;
6184 else if (!pragma_interrupt)
6186 /* The trap_exit attribute only has meaning for interrupt functions. */
6187 warning ("`%s' attribute only applies to interrupt functions",
6188 IDENTIFIER_POINTER (name));
6189 *no_add_attrs = true;
6191 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6193 /* The argument must be a constant integer. */
6194 warning ("`%s' attribute argument not an integer constant",
6195 IDENTIFIER_POINTER (name));
6196 *no_add_attrs = true;
6200 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6207 sh_cfun_interrupt_handler_p ()
6209 return (lookup_attribute ("interrupt_handler",
6210 DECL_ATTRIBUTES (current_function_decl))
6214 /* Predicates used by the templates. */
6216 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
6217 Used only in general_movsrc_operand. */
6220 system_reg_operand (op, mode)
6222 enum machine_mode mode ATTRIBUTE_UNUSED;
6234 /* Returns 1 if OP can be source of a simple move operation.
6235 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
6236 invalid as are subregs of system registers. */
6239 general_movsrc_operand (op, mode)
6241 enum machine_mode mode;
6243 if (GET_CODE (op) == MEM)
6245 rtx inside = XEXP (op, 0);
6246 if (GET_CODE (inside) == CONST)
6247 inside = XEXP (inside, 0);
6249 if (GET_CODE (inside) == LABEL_REF)
6252 if (GET_CODE (inside) == PLUS
6253 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
6254 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
6257 /* Only post inc allowed. */
6258 if (GET_CODE (inside) == PRE_DEC)
6262 if ((mode == QImode || mode == HImode)
6263 && (GET_CODE (op) == SUBREG
6264 && GET_CODE (XEXP (op, 0)) == REG
6265 && system_reg_operand (XEXP (op, 0), mode)))
6268 return general_operand (op, mode);
6271 /* Returns 1 if OP can be a destination of a move.
6272 Same as general_operand, but no preinc allowed. */
6275 general_movdst_operand (op, mode)
6277 enum machine_mode mode;
6279 /* Only pre dec allowed. */
6280 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
6283 return general_operand (op, mode);
6286 /* Returns 1 if OP is a normal arithmetic register. */
6289 arith_reg_operand (op, mode)
6291 enum machine_mode mode;
6293 if (register_operand (op, mode))
6297 if (GET_CODE (op) == REG)
6299 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6300 regno = REGNO (SUBREG_REG (op));
6304 return (regno != T_REG && regno != PR_REG
6305 && ! TARGET_REGISTER_P (regno)
6306 && (regno != FPUL_REG || TARGET_SH4)
6307 && regno != MACH_REG && regno != MACL_REG);
6312 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
6313 because this would lead to missing sign extensions when truncating from
6314 DImode to SImode. */
6316 arith_reg_dest (op, mode)
6318 enum machine_mode mode;
6320 if (mode == DImode && GET_CODE (op) == SUBREG
6321 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
6323 return arith_reg_operand (op, mode);
6327 int_gpr_dest (op, mode)
6329 enum machine_mode mode ATTRIBUTE_UNUSED;
6331 enum machine_mode op_mode = GET_MODE (op);
6333 if (GET_MODE_CLASS (op_mode) != MODE_INT
6334 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
6336 if (! reload_completed)
6338 return true_regnum (op) <= LAST_GENERAL_REG;
6342 fp_arith_reg_operand (op, mode)
6344 enum machine_mode mode;
6346 if (register_operand (op, mode))
6350 if (GET_CODE (op) == REG)
6352 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6353 regno = REGNO (SUBREG_REG (op));
6357 return (regno >= FIRST_PSEUDO_REGISTER
6358 || FP_REGISTER_P (regno));
6363 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
6366 arith_operand (op, mode)
6368 enum machine_mode mode;
6370 if (arith_reg_operand (op, mode))
6375 /* FIXME: We should be checking whether the CONST_INT fits in a
6376 CONST_OK_FOR_J here, but this causes reload_cse to crash when
6377 attempting to transform a sequence of two 64-bit sets of the
6378 same register from literal constants into a set and an add,
6379 when the difference is too wide for an add. */
6380 if (GET_CODE (op) == CONST_INT
6381 || EXTRA_CONSTRAINT_S (op))
6386 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
6392 /* Returns 1 if OP is a valid source operand for a compare insn. */
6395 arith_reg_or_0_operand (op, mode)
6397 enum machine_mode mode;
6399 if (arith_reg_operand (op, mode))
6402 if (EXTRA_CONSTRAINT_U (op))
6408 /* Return 1 if OP is a valid source operand for an SHmedia operation
6409 that takes either a register or a 6-bit immediate. */
6412 shmedia_6bit_operand (op, mode)
6414 enum machine_mode mode;
6416 return (arith_reg_operand (op, mode)
6417 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
6420 /* Returns 1 if OP is a valid source operand for a logical operation. */
6423 logical_operand (op, mode)
6425 enum machine_mode mode;
6427 if (arith_reg_operand (op, mode))
6432 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
6437 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
6444 and_operand (op, mode)
6446 enum machine_mode mode;
6448 if (logical_operand (op, mode))
6451 /* Check mshflo.l / mshflhi.l opportunities. */
6454 && GET_CODE (op) == CONST_INT
6455 && (INTVAL (op) == (unsigned) 0xffffffff
6456 || INTVAL (op) == (HOST_WIDE_INT) -1 << 32))
6462 /* Nonzero if OP is a floating point value with value 0.0. */
6465 fp_zero_operand (op)
6470 if (GET_MODE (op) != SFmode)
6473 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6474 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6477 /* Nonzero if OP is a floating point value with value 1.0. */
6485 if (GET_MODE (op) != SFmode)
6488 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6489 return REAL_VALUES_EQUAL (r, dconst1);
6492 /* For -m4 and -m4-single-only, mode switching is used. If we are
6493 compiling without -mfmovd, movsf_ie isn't taken into account for
6494 mode switching. We could check in machine_dependent_reorg for
6495 cases where we know we are in single precision mode, but there is
6496 interface to find that out during reload, so we must avoid
6497 choosing an fldi alternative during reload and thus failing to
6498 allocate a scratch register for the constant loading. */
6502 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6506 tertiary_reload_operand (op, mode)
6508 enum machine_mode mode ATTRIBUTE_UNUSED;
6510 enum rtx_code code = GET_CODE (op);
6511 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6515 fpscr_operand (op, mode)
6517 enum machine_mode mode ATTRIBUTE_UNUSED;
6519 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6520 && GET_MODE (op) == PSImode);
6524 fpul_operand (op, mode)
6526 enum machine_mode mode;
6529 return fp_arith_reg_operand (op, mode);
6531 return (GET_CODE (op) == REG
6532 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6533 && GET_MODE (op) == mode);
6537 symbol_ref_operand (op, mode)
6539 enum machine_mode mode ATTRIBUTE_UNUSED;
6541 return (GET_CODE (op) == SYMBOL_REF);
6544 /* Return the TLS type for TLS symbols, 0 for otherwise. */
6546 tls_symbolic_operand (op, mode)
6548 enum machine_mode mode ATTRIBUTE_UNUSED;
6550 if (GET_CODE (op) != SYMBOL_REF)
6552 return SYMBOL_REF_TLS_MODEL (op);
6556 commutative_float_operator (op, mode)
6558 enum machine_mode mode;
6560 if (GET_MODE (op) != mode)
6562 switch (GET_CODE (op))
6574 noncommutative_float_operator (op, mode)
6576 enum machine_mode mode;
6578 if (GET_MODE (op) != mode)
6580 switch (GET_CODE (op))
6592 unary_float_operator (op, mode)
6594 enum machine_mode mode;
6596 if (GET_MODE (op) != mode)
6598 switch (GET_CODE (op))
6611 binary_float_operator (op, mode)
6613 enum machine_mode mode;
6615 if (GET_MODE (op) != mode)
6617 switch (GET_CODE (op))
6631 binary_logical_operator (op, mode)
6633 enum machine_mode mode;
6635 if (GET_MODE (op) != mode)
6637 switch (GET_CODE (op))
6650 equality_comparison_operator (op, mode)
6652 enum machine_mode mode;
6654 return ((mode == VOIDmode || GET_MODE (op) == mode)
6655 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6658 int greater_comparison_operator (op, mode)
6660 enum machine_mode mode;
6662 if (mode != VOIDmode && GET_MODE (op) == mode)
6664 switch (GET_CODE (op))
6676 int less_comparison_operator (op, mode)
6678 enum machine_mode mode;
6680 if (mode != VOIDmode && GET_MODE (op) == mode)
6682 switch (GET_CODE (op))
6694 /* Accept pseudos and branch target registers. */
6696 target_reg_operand (op, mode)
6698 enum machine_mode mode;
6701 || GET_MODE (op) != DImode)
6704 if (GET_CODE (op) == SUBREG)
6707 if (GET_CODE (op) != REG)
6710 /* We must protect ourselves from matching pseudos that are virtual
6711 register, because they will eventually be replaced with hardware
6712 registers that aren't branch-target registers. */
6713 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6714 || TARGET_REGISTER_P (REGNO (op)))
6720 /* Same as target_reg_operand, except that label_refs and symbol_refs
6721 are accepted before reload. */
6723 target_operand (op, mode)
6725 enum machine_mode mode;
6730 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6731 && EXTRA_CONSTRAINT_T (op))
6732 return ! reload_completed;
6734 return target_reg_operand (op, mode);
6738 mextr_bit_offset (op, mode)
6740 enum machine_mode mode ATTRIBUTE_UNUSED;
6744 if (GET_CODE (op) != CONST_INT)
6747 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6751 extend_reg_operand (op, mode)
6753 enum machine_mode mode;
6755 return (GET_CODE (op) == TRUNCATE
6757 : arith_reg_operand) (op, mode);
6761 trunc_hi_operand (op, mode)
6763 enum machine_mode mode;
6765 enum machine_mode op_mode = GET_MODE (op);
6767 if (op_mode != SImode && op_mode != DImode
6768 && op_mode != V4HImode && op_mode != V2SImode)
6770 return extend_reg_operand (op, mode);
6774 extend_reg_or_0_operand (op, mode)
6776 enum machine_mode mode;
6778 return (GET_CODE (op) == TRUNCATE
6780 : arith_reg_or_0_operand) (op, mode);
6784 general_extend_operand (op, mode)
6786 enum machine_mode mode;
6788 return (GET_CODE (op) == TRUNCATE
6790 : nonimmediate_operand) (op, mode);
6794 inqhi_operand (op, mode)
6796 enum machine_mode mode;
6798 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6801 /* Can't use true_regnum here because copy_cost wants to know about
6802 SECONDARY_INPUT_RELOAD_CLASS. */
6803 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6807 sh_rep_vec (v, mode)
6809 enum machine_mode mode;
6814 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6815 || (GET_MODE (v) != mode && mode != VOIDmode))
6817 i = XVECLEN (v, 0) - 2;
6818 x = XVECEXP (v, 0, i + 1);
6819 if (GET_MODE_UNIT_SIZE (mode) == 1)
6821 y = XVECEXP (v, 0, i);
6822 for (i -= 2 ; i >= 0; i -= 2)
6823 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6824 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6829 if (XVECEXP (v, 0, i) != x)
6834 /* Determine if V is a constant vector matching MODE with only one element
6835 that is not a sign extension. Two byte-sized elements count as one. */
6837 sh_1el_vec (v, mode)
6839 enum machine_mode mode;
6842 int i, last, least, sign_ix;
6845 if (GET_CODE (v) != CONST_VECTOR
6846 || (GET_MODE (v) != mode && mode != VOIDmode))
6848 /* Determine numbers of last and of least significant elements. */
6849 last = XVECLEN (v, 0) - 1;
6850 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6851 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6854 if (GET_MODE_UNIT_SIZE (mode) == 1)
6855 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6856 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6858 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6859 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6860 ? constm1_rtx : const0_rtx);
6861 i = XVECLEN (v, 0) - 1;
6863 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6870 sh_const_vec (v, mode)
6872 enum machine_mode mode;
6876 if (GET_CODE (v) != CONST_VECTOR
6877 || (GET_MODE (v) != mode && mode != VOIDmode))
6879 i = XVECLEN (v, 0) - 1;
6881 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6886 /* Return the destination address of a branch. */
6889 branch_dest (branch)
6892 rtx dest = SET_SRC (PATTERN (branch));
6895 if (GET_CODE (dest) == IF_THEN_ELSE)
6896 dest = XEXP (dest, 1);
6897 dest = XEXP (dest, 0);
6898 dest_uid = INSN_UID (dest);
6899 return INSN_ADDRESSES (dest_uid);
6902 /* Return nonzero if REG is not used after INSN.
6903 We assume REG is a reload reg, and therefore does
6904 not live past labels. It may live past calls or jumps though. */
6906 reg_unused_after (reg, insn)
6913 /* If the reg is set by this instruction, then it is safe for our
6914 case. Disregard the case where this is a store to memory, since
6915 we are checking a register used in the store address. */
6916 set = single_set (insn);
6917 if (set && GET_CODE (SET_DEST (set)) != MEM
6918 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6921 while ((insn = NEXT_INSN (insn)))
6923 code = GET_CODE (insn);
6926 /* If this is a label that existed before reload, then the register
6927 if dead here. However, if this is a label added by reorg, then
6928 the register may still be live here. We can't tell the difference,
6929 so we just ignore labels completely. */
6930 if (code == CODE_LABEL)
6935 if (code == JUMP_INSN)
6938 /* If this is a sequence, we must handle them all at once.
6939 We could have for instance a call that sets the target register,
6940 and an insn in a delay slot that uses the register. In this case,
6941 we must return 0. */
6942 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6947 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6949 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6950 rtx set = single_set (this_insn);
6952 if (GET_CODE (this_insn) == CALL_INSN)
6954 else if (GET_CODE (this_insn) == JUMP_INSN)
6956 if (INSN_ANNULLED_BRANCH_P (this_insn))
6961 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6963 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6965 if (GET_CODE (SET_DEST (set)) != MEM)
6971 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6976 else if (code == JUMP_INSN)
6979 else if (GET_RTX_CLASS (code) == 'i')
6981 rtx set = single_set (insn);
6983 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6985 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6986 return GET_CODE (SET_DEST (set)) != MEM;
6987 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
6991 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
6999 static GTY(()) rtx fpscr_rtx;
7005 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
7006 REG_USERVAR_P (fpscr_rtx) = 1;
7007 mark_user_reg (fpscr_rtx);
7009 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7010 mark_user_reg (fpscr_rtx);
7029 expand_sf_unop (fun, operands)
7030 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7033 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7037 expand_sf_binop (fun, operands)
7038 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7041 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7046 expand_df_unop (fun, operands)
7047 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7050 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7054 expand_df_binop (fun, operands)
7055 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7058 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7062 /* ??? gcc does flow analysis strictly after common subexpression
7063 elimination. As a result, common subexpression elimination fails
7064 when there are some intervening statements setting the same register.
7065 If we did nothing about this, this would hurt the precision switching
7066 for SH4 badly. There is some cse after reload, but it is unable to
7067 undo the extra register pressure from the unused instructions, and
7068 it cannot remove auto-increment loads.
7070 A C code example that shows this flow/cse weakness for (at least) SH
7071 and sparc (as of gcc ss-970706) is this:
7085 So we add another pass before common subexpression elimination, to
7086 remove assignments that are dead due to a following assignment in the
7087 same basic block. */
7090 mark_use (x, reg_set_block)
7091 rtx x, *reg_set_block;
7097 code = GET_CODE (x);
7102 int regno = REGNO (x);
7103 int nregs = (regno < FIRST_PSEUDO_REGISTER
7104 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7108 reg_set_block[regno + nregs - 1] = 0;
7115 rtx dest = SET_DEST (x);
7117 if (GET_CODE (dest) == SUBREG)
7118 dest = SUBREG_REG (dest);
7119 if (GET_CODE (dest) != REG)
7120 mark_use (dest, reg_set_block);
7121 mark_use (SET_SRC (x), reg_set_block);
7128 const char *fmt = GET_RTX_FORMAT (code);
7130 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7133 mark_use (XEXP (x, i), reg_set_block);
7134 else if (fmt[i] == 'E')
7135 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7136 mark_use (XVECEXP (x, i, j), reg_set_block);
7143 static rtx get_free_reg PARAMS ((HARD_REG_SET));
7145 /* This function returns a register to use to load the address to load
7146 the fpscr from. Currently it always returns r1 or r7, but when we are
7147 able to use pseudo registers after combine, or have a better mechanism
7148 for choosing a register, it should be done here. */
7149 /* REGS_LIVE is the liveness information for the point for which we
7150 need this allocation. In some bare-bones exit blocks, r1 is live at the
7151 start. We can even have all of r0..r3 being live:
7152 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7153 INSN before which new insns are placed with will clobber the register
7154 we return. If a basic block consists only of setting the return value
7155 register to a pseudo and using that register, the return value is not
7156 live before or after this block, yet we we'll insert our insns right in
7160 get_free_reg (regs_live)
7161 HARD_REG_SET regs_live;
7163 if (! TEST_HARD_REG_BIT (regs_live, 1))
7164 return gen_rtx_REG (Pmode, 1);
7166 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7167 there shouldn't be anything but a jump before the function end. */
7168 if (! TEST_HARD_REG_BIT (regs_live, 7))
7169 return gen_rtx_REG (Pmode, 7);
7174 /* This function will set the fpscr from memory.
7175 MODE is the mode we are setting it to. */
7177 fpscr_set_from_mem (mode, regs_live)
7179 HARD_REG_SET regs_live;
7181 enum attr_fp_mode fp_mode = mode;
7182 rtx addr_reg = get_free_reg (regs_live);
7184 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7185 emit_insn (gen_fpu_switch1 (addr_reg));
7187 emit_insn (gen_fpu_switch0 (addr_reg));
7190 /* Is the given character a logical line separator for the assembler? */
7191 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7192 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7196 sh_insn_length_adjustment (insn)
7199 /* Instructions with unfilled delay slots take up an extra two bytes for
7200 the nop in the delay slot. */
7201 if (((GET_CODE (insn) == INSN
7202 && GET_CODE (PATTERN (insn)) != USE
7203 && GET_CODE (PATTERN (insn)) != CLOBBER)
7204 || GET_CODE (insn) == CALL_INSN
7205 || (GET_CODE (insn) == JUMP_INSN
7206 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7207 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7208 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7209 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7212 /* SH2e has a bug that prevents the use of annulled branches, so if
7213 the delay slot is not filled, we'll have to put a NOP in it. */
7214 if (sh_cpu == CPU_SH2E
7215 && GET_CODE (insn) == JUMP_INSN
7216 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7217 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7218 && get_attr_type (insn) == TYPE_CBRANCH
7219 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7222 /* sh-dsp parallel processing insn take four bytes instead of two. */
7224 if (GET_CODE (insn) == INSN)
7227 rtx body = PATTERN (insn);
7228 const char *template;
7230 int maybe_label = 1;
7232 if (GET_CODE (body) == ASM_INPUT)
7233 template = XSTR (body, 0);
7234 else if (asm_noperands (body) >= 0)
7236 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7245 while (c == ' ' || c == '\t');
7246 /* all sh-dsp parallel-processing insns start with p.
7247 The only non-ppi sh insn starting with p is pref.
7248 The only ppi starting with pr is prnd. */
7249 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7251 /* The repeat pseudo-insn expands two three insns, a total of
7252 six bytes in size. */
7253 else if ((c == 'r' || c == 'R')
7254 && ! strncasecmp ("epeat", template, 5))
7256 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7258 /* If this is a label, it is obviously not a ppi insn. */
7259 if (c == ':' && maybe_label)
7264 else if (c == '\'' || c == '"')
7269 maybe_label = c != ':';
7277 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7278 isn't protected by a PIC unspec. */
7280 nonpic_symbol_mentioned_p (x)
7283 register const char *fmt;
7286 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7287 || GET_CODE (x) == PC)
7290 /* We don't want to look into the possible MEM location of a
7291 CONST_DOUBLE, since we're not going to use it, in general. */
7292 if (GET_CODE (x) == CONST_DOUBLE)
7295 if (GET_CODE (x) == UNSPEC
7296 && (XINT (x, 1) == UNSPEC_PIC
7297 || XINT (x, 1) == UNSPEC_GOT
7298 || XINT (x, 1) == UNSPEC_GOTOFF
7299 || XINT (x, 1) == UNSPEC_GOTPLT
7300 || XINT (x, 1) == UNSPEC_GOTTPOFF
7301 || XINT (x, 1) == UNSPEC_DTPOFF
7302 || XINT (x, 1) == UNSPEC_PLT))
7305 fmt = GET_RTX_FORMAT (GET_CODE (x));
7306 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7312 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7313 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
7316 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
7323 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
7324 @GOTOFF in `reg'. */
7326 legitimize_pic_address (orig, mode, reg)
7328 enum machine_mode mode ATTRIBUTE_UNUSED;
7331 if (tls_symbolic_operand (orig, Pmode))
7334 if (GET_CODE (orig) == LABEL_REF
7335 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
7338 reg = gen_reg_rtx (Pmode);
7340 emit_insn (gen_symGOTOFF2reg (reg, orig));
7343 else if (GET_CODE (orig) == SYMBOL_REF)
7346 reg = gen_reg_rtx (Pmode);
7348 emit_insn (gen_symGOT2reg (reg, orig));
7354 /* Mark the use of a constant in the literal table. If the constant
7355 has multiple labels, make it unique. */
7357 mark_constant_pool_use (x)
7360 rtx insn, lab, pattern;
7365 switch (GET_CODE (x))
7375 /* Get the first label in the list of labels for the same constant
7376 and delete another labels in the list. */
7378 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7380 if (GET_CODE (insn) != CODE_LABEL
7381 || LABEL_REFS (insn) != NEXT_INSN (insn))
7386 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
7387 INSN_DELETED_P (insn) = 1;
7389 /* Mark constants in a window. */
7390 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
7392 if (GET_CODE (insn) != INSN)
7395 pattern = PATTERN (insn);
7396 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
7399 switch (XINT (pattern, 1))
7401 case UNSPECV_CONST2:
7402 case UNSPECV_CONST4:
7403 case UNSPECV_CONST8:
7404 XVECEXP (pattern, 0, 1) = const1_rtx;
7406 case UNSPECV_WINDOW_END:
7407 if (XVECEXP (pattern, 0, 0) == x)
7410 case UNSPECV_CONST_END:
7420 /* Return true if it's possible to redirect BRANCH1 to the destination
7421 of an unconditional jump BRANCH2. We only want to do this if the
7422 resulting branch will have a short displacement. */
7424 sh_can_redirect_branch (branch1, branch2)
7428 if (flag_expensive_optimizations && simplejump_p (branch2))
7430 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
7434 for (distance = 0, insn = NEXT_INSN (branch1);
7435 insn && distance < 256;
7436 insn = PREV_INSN (insn))
7441 distance += get_attr_length (insn);
7443 for (distance = 0, insn = NEXT_INSN (branch1);
7444 insn && distance < 256;
7445 insn = NEXT_INSN (insn))
7450 distance += get_attr_length (insn);
7456 /* Return nonzero if register old_reg can be renamed to register new_reg. */
7458 sh_hard_regno_rename_ok (old_reg, new_reg)
7459 unsigned int old_reg ATTRIBUTE_UNUSED;
7460 unsigned int new_reg;
7463 /* Interrupt functions can only use registers that have already been
7464 saved by the prologue, even if they would normally be
7467 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
7473 /* Function to update the integer COST
7474 based on the relationship between INSN that is dependent on
7475 DEP_INSN through the dependence LINK. The default is to make no
7476 adjustment to COST. This can be used for example to specify to
7477 the scheduler that an output- or anti-dependence does not incur
7478 the same cost as a data-dependence. The return value should be
7479 the new value for COST. */
7481 sh_adjust_cost (insn, link, dep_insn, cost)
7483 rtx link ATTRIBUTE_UNUSED;
7491 /* On SHmedia, if the dependence is an anti-dependence or
7492 output-dependence, there is no cost. */
7493 if (REG_NOTE_KIND (link) != 0)
7496 if (get_attr_is_mac_media (insn)
7497 && get_attr_is_mac_media (dep_insn))
7500 else if (REG_NOTE_KIND (link) == 0)
7502 enum attr_type dep_type, type;
7504 if (recog_memoized (insn) < 0
7505 || recog_memoized (dep_insn) < 0)
7508 dep_type = get_attr_type (dep_insn);
7509 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7511 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7512 && (type = get_attr_type (insn)) != TYPE_CALL
7513 && type != TYPE_SFUNC)
7516 /* The only input for a call that is timing-critical is the
7517 function's address. */
7518 if (GET_CODE(insn) == CALL_INSN)
7520 rtx call = PATTERN (insn);
7522 if (GET_CODE (call) == PARALLEL)
7523 call = XVECEXP (call, 0 ,0);
7524 if (GET_CODE (call) == SET)
7525 call = SET_SRC (call);
7526 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7527 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7530 /* Likewise, the most timing critical input for an sfuncs call
7531 is the function address. However, sfuncs typically start
7532 using their arguments pretty quickly.
7533 Assume a four cycle delay before they are needed. */
7534 /* All sfunc calls are parallels with at least four components.
7535 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7536 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7537 && XVECLEN (PATTERN (insn), 0) >= 4
7538 && (reg = sfunc_uses_reg (insn)))
7540 if (! reg_set_p (reg, dep_insn))
7543 /* When the preceding instruction loads the shift amount of
7544 the following SHAD/SHLD, the latency of the load is increased
7547 && get_attr_type (insn) == TYPE_DYN_SHIFT
7548 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7549 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7550 XEXP (SET_SRC (single_set(insn)),
7553 /* When an LS group instruction with a latency of less than
7554 3 cycles is followed by a double-precision floating-point
7555 instruction, FIPR, or FTRV, the latency of the first
7556 instruction is increased to 3 cycles. */
7558 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7559 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7561 /* The lsw register of a double-precision computation is ready one
7563 else if (reload_completed
7564 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7565 && (use_pat = single_set (insn))
7566 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7570 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7571 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7574 /* An anti-dependence penalty of two applies if the first insn is a double
7575 precision fadd / fsub / fmul. */
7576 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7577 && recog_memoized (dep_insn) >= 0
7578 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7579 /* A lot of alleged anti-flow dependences are fake,
7580 so check this one is real. */
7581 && flow_dependent_p (dep_insn, insn))
7588 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7589 if DEP_INSN is anti-flow dependent on INSN. */
7591 flow_dependent_p (insn, dep_insn)
7594 rtx tmp = PATTERN (insn);
7596 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7597 return tmp == NULL_RTX;
7600 /* A helper function for flow_dependent_p called through note_stores. */
7602 flow_dependent_p_1 (x, pat, data)
7604 rtx pat ATTRIBUTE_UNUSED;
7607 rtx * pinsn = (rtx *) data;
7609 if (*pinsn && reg_referenced_p (x, *pinsn))
7613 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7614 'special function' patterns (type sfunc) that clobber pr, but that
7615 do not look like function calls to leaf_function_p. Hence we must
7616 do this extra check. */
7620 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7623 /* This Function returns nonzero if the DFA based scheduler interface
7624 is to be used. At present this is supported for the SH4 only. */
7626 sh_use_dfa_interface()
7628 if (TARGET_HARD_SH4)
7634 /* This function returns "2" to indicate dual issue for the SH4
7635 processor. To be used by the DFA pipeline description. */
7639 if (TARGET_SUPERSCALAR)
7645 /* SHmedia requires registers for branches, so we can't generate new
7646 branches past reload. */
7648 sh_cannot_modify_jumps_p ()
7650 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7654 sh_ms_bitfield_layout_p (record_type)
7655 tree record_type ATTRIBUTE_UNUSED;
7661 On the SH1..SH4, the trampoline looks like
7662 2 0002 D202 mov.l l2,r2
7663 1 0000 D301 mov.l l1,r3
7666 5 0008 00000000 l1: .long area
7667 6 000c 00000000 l2: .long function
7669 SH5 (compact) uses r1 instead of r3 for the static chain. */
7672 /* Emit RTL insns to initialize the variable parts of a trampoline.
7673 FNADDR is an RTX for the address of the function's pure code.
7674 CXT is an RTX for the static chain value for the function. */
7677 sh_initialize_trampoline (tramp, fnaddr, cxt)
7678 rtx tramp, fnaddr, cxt;
7680 if (TARGET_SHMEDIA64)
7685 rtx movi1 = GEN_INT (0xcc000010);
7686 rtx shori1 = GEN_INT (0xc8000010);
7689 /* The following trampoline works within a +- 128 KB range for cxt:
7690 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7691 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7692 gettr tr1,r1; blink tr0,r63 */
7693 /* Address rounding makes it hard to compute the exact bounds of the
7694 offset for this trampoline, but we have a rather generous offset
7695 range, so frame_offset should do fine as an upper bound. */
7696 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7698 /* ??? could optimize this trampoline initialization
7699 by writing DImode words with two insns each. */
7700 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7701 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7702 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7703 insn = gen_rtx_AND (DImode, insn, mask);
7704 /* Or in ptb/u .,tr1 pattern */
7705 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7706 insn = force_operand (insn, NULL_RTX);
7707 insn = gen_lowpart (SImode, insn);
7708 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7709 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7710 insn = gen_rtx_AND (DImode, insn, mask);
7711 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7712 insn = gen_lowpart (SImode, insn);
7713 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7714 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7715 insn = gen_rtx_AND (DImode, insn, mask);
7716 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7717 insn = gen_lowpart (SImode, insn);
7718 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7719 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7720 insn = gen_rtx_AND (DImode, insn, mask);
7721 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7722 insn = gen_lowpart (SImode, insn);
7723 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7725 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7726 insn = gen_rtx_AND (DImode, insn, mask);
7727 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7728 insn = gen_lowpart (SImode, insn);
7729 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7731 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7732 GEN_INT (0x6bf10600));
7733 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7734 GEN_INT (0x4415fc10));
7735 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7736 GEN_INT (0x4401fff0));
7737 emit_insn (gen_ic_invalidate_line (tramp));
7740 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7741 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7743 tramp_templ = gen_datalabel_ref (tramp_templ);
7744 dst = gen_rtx_MEM (BLKmode, tramp);
7745 src = gen_rtx_MEM (BLKmode, tramp_templ);
7746 set_mem_align (dst, 256);
7747 set_mem_align (src, 64);
7748 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7750 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7752 emit_move_insn (gen_rtx_MEM (Pmode,
7753 plus_constant (tramp,
7755 + GET_MODE_SIZE (Pmode))),
7757 emit_insn (gen_ic_invalidate_line (tramp));
7760 else if (TARGET_SHMEDIA)
7762 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7763 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7764 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7765 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7766 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7767 rotated 10 right, and higher 16 bit of every 32 selected. */
7769 = force_reg (V2HImode, (simplify_gen_subreg
7770 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7771 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7772 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7774 tramp = force_reg (Pmode, tramp);
7775 fnaddr = force_reg (SImode, fnaddr);
7776 cxt = force_reg (SImode, cxt);
7777 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7778 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7780 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
7781 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7782 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7783 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7784 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7785 gen_rtx_SUBREG (V2HImode, cxt, 0),
7787 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
7788 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7789 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7790 if (TARGET_LITTLE_ENDIAN)
7792 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7793 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7797 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7798 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7800 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7801 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7802 emit_insn (gen_ic_invalidate_line (tramp));
7805 else if (TARGET_SHCOMPACT)
7807 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7810 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7811 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7813 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7814 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7816 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7818 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7822 if (TARGET_USERMODE)
7823 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),
7824 0, VOIDmode, 1, tramp, SImode);
7826 emit_insn (gen_ic_invalidate_line (tramp));
7830 /* FIXME: This is overly conservative. A SHcompact function that
7831 receives arguments ``by reference'' will have them stored in its
7832 own stack frame, so it must not pass pointers or references to
7833 these arguments to other functions by means of sibling calls. */
7835 sh_function_ok_for_sibcall (decl, exp)
7837 tree exp ATTRIBUTE_UNUSED;
7840 && (! TARGET_SHCOMPACT
7841 || current_function_args_info.stack_regs == 0)
7842 && ! sh_cfun_interrupt_handler_p ());
7845 /* Machine specific built-in functions. */
7847 struct builtin_description
7849 const enum insn_code icode;
7850 const char *const name;
7854 /* describe number and signedness of arguments; arg[0] == result
7855 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7856 static const char signature_args[][4] =
7858 #define SH_BLTIN_V2SI2 0
7860 #define SH_BLTIN_V4HI2 1
7862 #define SH_BLTIN_V2SI3 2
7864 #define SH_BLTIN_V4HI3 3
7866 #define SH_BLTIN_V8QI3 4
7868 #define SH_BLTIN_MAC_HISI 5
7870 #define SH_BLTIN_SH_HI 6
7872 #define SH_BLTIN_SH_SI 7
7874 #define SH_BLTIN_V4HI2V2SI 8
7876 #define SH_BLTIN_V4HI2V8QI 9
7878 #define SH_BLTIN_SISF 10
7880 #define SH_BLTIN_LDUA_L 11
7882 #define SH_BLTIN_LDUA_Q 12
7884 #define SH_BLTIN_STUA_L 13
7886 #define SH_BLTIN_STUA_Q 14
7888 #define SH_BLTIN_UDI 15
7890 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
7891 #define SH_BLTIN_2 16
7892 #define SH_BLTIN_SU 16
7894 #define SH_BLTIN_3 17
7895 #define SH_BLTIN_SUS 17
7897 #define SH_BLTIN_PSSV 18
7899 #define SH_BLTIN_XXUU 19
7900 #define SH_BLTIN_UUUU 19
7902 #define SH_BLTIN_PV 20
7905 /* mcmv: operands considered unsigned. */
7906 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
7907 /* mperm: control value considered unsigned int. */
7908 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
7909 /* mshards_q: returns signed short. */
7910 /* nsb: takes long long arg, returns unsigned char. */
7911 static const struct builtin_description bdesc[] =
7913 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
7914 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
7915 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
7916 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
7917 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
7918 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
7919 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
7921 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7922 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7924 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
7925 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
7926 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
7927 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
7928 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
7929 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
7930 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
7931 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
7932 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
7933 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
7934 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
7935 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
7936 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
7937 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
7938 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
7939 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
7940 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
7941 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
7942 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
7943 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
7944 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
7945 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
7946 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
7947 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
7948 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
7949 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
7950 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
7951 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
7952 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
7953 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
7954 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
7955 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
7956 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
7957 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
7958 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
7959 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
7960 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
7961 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
7962 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
7963 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
7964 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
7965 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
7966 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
7967 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
7968 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
7969 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
7970 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
7971 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
7972 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
7973 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
7974 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
7975 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
7976 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
7977 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
7979 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7980 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7981 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7982 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7983 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7984 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7985 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7986 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7987 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7988 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7989 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7990 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7991 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7992 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7993 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7994 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7996 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
7997 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
7999 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8000 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8005 sh_media_init_builtins ()
8007 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8008 const struct builtin_description *d;
8010 memset (shared, 0, sizeof shared);
8011 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
8013 tree type, arg_type;
8014 int signature = d->signature;
8017 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8018 type = shared[signature];
8021 int has_result = signature_args[signature][0] != 0;
8023 if (signature_args[signature][1] == 8
8024 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8026 if (! TARGET_FPU_ANY
8027 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8029 type = void_list_node;
8032 int arg = signature_args[signature][i];
8033 int opno = i - 1 + has_result;
8036 arg_type = ptr_type_node;
8038 arg_type = ((*lang_hooks.types.type_for_mode)
8039 (insn_data[d->icode].operand[opno].mode,
8044 arg_type = void_type_node;
8047 type = tree_cons (NULL_TREE, arg_type, type);
8049 type = build_function_type (arg_type, type);
8050 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8051 shared[signature] = type;
8053 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8062 sh_media_init_builtins ();
8065 /* Expand an expression EXP that calls a built-in function,
8066 with result going to TARGET if that's convenient
8067 (and in mode MODE if that's convenient).
8068 SUBTARGET may be used as the target for computing one of EXP's operands.
8069 IGNORE is nonzero if the value is to be ignored. */
8072 sh_expand_builtin (exp, target, subtarget, mode, ignore)
8075 rtx subtarget ATTRIBUTE_UNUSED;
8076 enum machine_mode mode ATTRIBUTE_UNUSED;
8079 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8080 tree arglist = TREE_OPERAND (exp, 1);
8081 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8082 const struct builtin_description *d = &bdesc[fcode];
8083 enum insn_code icode = d->icode;
8084 int signature = d->signature;
8085 enum machine_mode tmode = VOIDmode;
8090 if (signature_args[signature][0])
8095 tmode = insn_data[icode].operand[0].mode;
8097 || GET_MODE (target) != tmode
8098 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8099 target = gen_reg_rtx (tmode);
8105 for (i = 1; i <= 3; i++, nop++)
8108 enum machine_mode opmode, argmode;
8110 if (! signature_args[signature][i])
8112 arg = TREE_VALUE (arglist);
8113 if (arg == error_mark_node)
8115 arglist = TREE_CHAIN (arglist);
8116 opmode = insn_data[icode].operand[nop].mode;
8117 argmode = TYPE_MODE (TREE_TYPE (arg));
8118 if (argmode != opmode)
8119 arg = build1 (NOP_EXPR,
8120 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8121 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8122 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8123 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8129 pat = (*insn_data[d->icode].genfun) (op[0]);
8132 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8135 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8138 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8150 sh_expand_unop_v2sf (code, op0, op1)
8154 rtx sel0 = const0_rtx;
8155 rtx sel1 = const1_rtx;
8156 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
8157 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8159 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8160 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8164 sh_expand_binop_v2sf (code, op0, op1, op2)
8168 rtx sel0 = const0_rtx;
8169 rtx sel1 = const1_rtx;
8170 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx))
8172 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8174 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8175 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8178 /* Return the class of registers for which a mode change from FROM to TO
8181 sh_cannot_change_mode_class (from, to, class)
8182 enum machine_mode from, to;
8183 enum reg_class class;
8185 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8187 if (TARGET_LITTLE_ENDIAN)
8189 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8190 return reg_classes_intersect_p (DF_REGS, class);
8194 if (GET_MODE_SIZE (from) < 8)
8195 return reg_classes_intersect_p (DF_HI_REGS, class);
8202 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8203 that label is used. */
8206 sh_mark_label (address, nuses)
8210 if (GOTOFF_P (address))
8212 /* Extract the label or symbol. */
8213 address = XEXP (address, 0);
8214 if (GET_CODE (address) == PLUS)
8215 address = XEXP (address, 0);
8216 address = XVECEXP (address, 0, 0);
8218 if (GET_CODE (address) == LABEL_REF
8219 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8220 LABEL_NUSES (XEXP (address, 0)) += nuses;
8223 /* Compute extra cost of moving data between one register class
8226 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8227 uses this information. Hence, the general register <-> floating point
8228 register information here is not used for SFmode. */
8231 sh_register_move_cost (mode, srcclass, dstclass)
8232 enum machine_mode mode;
8233 enum reg_class srcclass, dstclass;
8235 if (dstclass == T_REGS || dstclass == PR_REGS)
8238 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8239 && REGCLASS_HAS_FP_REG (srcclass)
8240 && REGCLASS_HAS_FP_REG (dstclass))
8243 if ((REGCLASS_HAS_FP_REG (dstclass)
8244 && REGCLASS_HAS_GENERAL_REG (srcclass))
8245 || (REGCLASS_HAS_GENERAL_REG (dstclass)
8246 && REGCLASS_HAS_FP_REG (srcclass)))
8247 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
8248 * ((GET_MODE_SIZE (mode) + 7) / 8U));
8250 if ((dstclass == FPUL_REGS
8251 && REGCLASS_HAS_GENERAL_REG (srcclass))
8252 || (srcclass == FPUL_REGS
8253 && REGCLASS_HAS_GENERAL_REG (dstclass)))
8256 if ((dstclass == FPUL_REGS
8257 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
8258 || (srcclass == FPUL_REGS
8259 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
8262 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8263 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8266 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8267 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8272 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
8273 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
8274 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
8276 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
8279 /* Like register_operand, but take into account that SHMEDIA can use
8280 the constant zero like a general register. */
8282 sh_register_operand (op, mode)
8284 enum machine_mode mode;
8286 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
8288 return register_operand (op, mode);
8291 static rtx emit_load_ptr PARAMS ((rtx, rtx));
8294 emit_load_ptr (reg, addr)
8297 rtx mem = gen_rtx_MEM (ptr_mode, addr);
8299 if (Pmode != ptr_mode)
8300 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
8301 return emit_move_insn (reg, mem);
8305 sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
8307 tree thunk_fndecl ATTRIBUTE_UNUSED;
8308 HOST_WIDE_INT delta;
8309 HOST_WIDE_INT vcall_offset;
8312 CUMULATIVE_ARGS cum;
8313 int structure_value_byref = 0;
8314 rtx this, this_value, sibcall, insns, funexp;
8315 tree funtype = TREE_TYPE (function);
8317 = (TARGET_SHMEDIA ? CONST_OK_FOR_J (delta) : CONST_OK_FOR_I (delta));
8319 rtx scratch0, scratch1, scratch2;
8321 reload_completed = 1;
8323 current_function_uses_only_leaf_regs = 1;
8325 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8327 /* Find the "this" pointer. We have such a wide range of ABIs for the
8328 SH that it's best to do this completely machine independently.
8329 "this" is passed as first argument, unless a structure return pointer
8330 comes first, in which case "this" comes second. */
8331 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0);
8332 #ifndef PCC_STATIC_STRUCT_RETURN
8333 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
8334 structure_value_byref = 1;
8335 #endif /* not PCC_STATIC_STRUCT_RETURN */
8336 if (structure_value_byref && struct_value_rtx == 0)
8338 tree ptype = build_pointer_type (TREE_TYPE (funtype));
8340 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
8342 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
8344 /* For SHcompact, we only have r0 for a scratch register: r1 is the
8345 static chain pointer (even if you can't have nested virtual functions
8346 right now, someone might implement them sometime), and the rest of the
8347 registers are used for argument passing, are callee-saved, or reserved. */
8348 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
8351 scratch1 = gen_rtx_REG (ptr_mode, 1);
8352 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
8353 pointing where to return struct values. */
8354 scratch2 = gen_rtx_REG (Pmode, 3);
8356 else if (TARGET_SHMEDIA)
8358 scratch1 = gen_rtx_REG (ptr_mode, 21);
8359 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
8362 this_value = plus_constant (this, delta);
8364 && (simple_add || scratch0 != scratch1)
8365 && strict_memory_address_p (ptr_mode, this_value))
8367 emit_load_ptr (scratch0, this_value);
8373 else if (simple_add)
8374 emit_move_insn (this, this_value);
8377 emit_move_insn (scratch1, GEN_INT (delta));
8378 emit_insn (gen_add2_insn (this, scratch1));
8386 emit_load_ptr (scratch0, this);
8388 offset_addr = plus_constant (scratch0, vcall_offset);
8389 if (strict_memory_address_p (ptr_mode, offset_addr))
8391 else if (! TARGET_SH5)
8393 /* scratch0 != scratch1, and we have indexed loads. Get better
8394 schedule by loading the offset into r1 and using an indexed
8395 load - then the load of r1 can issue before the load from
8396 (this + delta) finishes. */
8397 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8398 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
8400 else if (TARGET_SHMEDIA
8401 ? CONST_OK_FOR_J (vcall_offset)
8402 : CONST_OK_FOR_I (vcall_offset))
8404 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
8405 offset_addr = scratch0;
8407 else if (scratch0 != scratch1)
8409 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8410 emit_insn (gen_add2_insn (scratch0, scratch1));
8411 offset_addr = scratch0;
8414 abort (); /* FIXME */
8415 emit_load_ptr (scratch0, offset_addr);
8417 if (Pmode != ptr_mode)
8418 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
8419 emit_insn (gen_add2_insn (this, scratch0));
8422 /* Generate a tail call to the target function. */
8423 if (! TREE_USED (function))
8425 assemble_external (function);
8426 TREE_USED (function) = 1;
8428 funexp = XEXP (DECL_RTL (function), 0);
8429 emit_move_insn (scratch2, funexp);
8430 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
8431 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
8432 SIBLING_CALL_P (sibcall) = 1;
8433 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
8436 /* Run just enough of rest_of_compilation to do scheduling and get
8437 the insns emitted. Note that use_thunk calls
8438 assemble_start_function and assemble_end_function. */
8439 insns = get_insns ();
8441 if (optimize > 0 && flag_schedule_insns_after_reload)
8444 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
8445 life_analysis (insns, rtl_dump_file, PROP_FINAL);
8447 split_all_insns (1);
8449 schedule_insns (rtl_dump_file);
8452 MACHINE_DEPENDENT_REORG (insns);
8454 if (optimize > 0 && flag_delayed_branch)
8455 dbr_schedule (insns, rtl_dump_file);
8456 shorten_branches (insns);
8457 final_start_function (insns, file, 1);
8458 final (insns, file, 1, 0);
8459 final_end_function ();
8461 if (optimize > 0 && flag_schedule_insns_after_reload)
8463 /* Release all memory allocated by flow. */
8464 free_basic_block_vars (0);
8466 /* Release all memory held by regsets now. */
8467 regset_release_memory ();
8470 reload_completed = 0;