1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
55 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
57 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
58 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
60 /* These are some macros to abstract register modes. */
61 #define CONST_OK_FOR_ADD(size) \
62 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
63 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
64 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
65 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
67 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
68 int current_function_interrupt;
70 /* ??? The pragma interrupt support will not work for SH3. */
71 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
72 output code for the next function appropriate for an interrupt handler. */
75 /* This is set by the trap_exit attribute for functions. It specifies
76 a trap number to be used in a trapa instruction at function exit
77 (instead of an rte instruction). */
80 /* This is used by the sp_switch attribute for functions. It specifies
81 a variable holding the address of the stack the interrupt function
82 should switch to/from at entry/exit. */
85 /* This is set by #pragma trapa, and is similar to the above, except that
86 the compiler doesn't emit code to preserve all registers. */
87 static int pragma_trapa;
89 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
90 which has a separate set of low regs for User and Supervisor modes.
91 This should only be used for the lowest level of interrupts. Higher levels
92 of interrupts must save the registers in case they themselves are
94 int pragma_nosave_low_regs;
96 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
97 sh_expand_prologue. */
98 int current_function_anonymous_args;
100 /* Global variables for machine-dependent things. */
102 /* Which cpu are we scheduling for. */
103 enum processor_type sh_cpu;
105 /* Definitions used in ready queue reordering for first scheduling pass. */
107 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
108 static short *regmode_weight[2];
110 /* Total SFmode and SImode weights of scheduled insns. */
111 static int curr_regmode_pressure[2];
113 /* If true, skip cycles for Q -> R movement. */
114 static int skip_cycles = 0;
116 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
117 and returned from sh_reorder2. */
118 static short cached_can_issue_more;
120 /* Saved operands from the last compare to use when we generate an scc
126 /* Provides the class number of the smallest class containing
129 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
131 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
164 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
165 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
166 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
167 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
168 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 char sh_register_names[FIRST_PSEUDO_REGISTER] \
173 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
175 char sh_additional_register_names[ADDREGNAMES_SIZE] \
176 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
177 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
179 /* Provide reg_class from a letter such as appears in the machine
180 description. *: target independently reserved letter.
181 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
183 enum reg_class reg_class_from_letter[] =
185 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
186 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
187 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
188 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
189 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
190 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
191 /* y */ FPUL_REGS, /* z */ R0_REGS
194 int assembler_dialect;
196 static bool shmedia_space_reserved_for_target_registers;
198 static void split_branches (rtx);
199 static int branch_dest (rtx);
200 static void force_into (rtx, rtx);
201 static void print_slot (rtx);
202 static rtx add_constant (rtx, enum machine_mode, rtx);
203 static void dump_table (rtx, rtx);
204 static int hi_const (rtx);
205 static int broken_move (rtx);
206 static int mova_p (rtx);
207 static rtx find_barrier (int, rtx, rtx);
208 static int noncall_uses_reg (rtx, rtx, rtx *);
209 static rtx gen_block_redirect (rtx, int, int);
210 static void sh_reorg (void);
211 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
212 static rtx frame_insn (rtx);
213 static rtx push (int);
214 static void pop (int);
215 static void push_regs (HARD_REG_SET *, int);
216 static int calc_live_regs (HARD_REG_SET *);
217 static void mark_use (rtx, rtx *);
218 static HOST_WIDE_INT rounded_frame_size (int);
219 static rtx mark_constant_pool_use (rtx);
220 const struct attribute_spec sh_attribute_table[];
221 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
222 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
223 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
225 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
226 static void sh_insert_attributes (tree, tree *);
227 static int sh_adjust_cost (rtx, rtx, rtx, int);
228 static int sh_use_dfa_interface (void);
229 static int sh_issue_rate (void);
230 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
231 static short find_set_regmode_weight (rtx, enum machine_mode);
232 static short find_insn_regmode_weight (rtx, enum machine_mode);
233 static void find_regmode_weight (int, enum machine_mode);
234 static void sh_md_init_global (FILE *, int, int);
235 static void sh_md_finish_global (FILE *, int);
236 static int rank_for_reorder (const void *, const void *);
237 static void swap_reorder (rtx *, int);
238 static void ready_reorder (rtx *, int);
239 static short high_pressure (enum machine_mode);
240 static int sh_reorder (FILE *, int, rtx *, int *, int);
241 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
242 static void sh_md_init (FILE *, int, int);
243 static int sh_variable_issue (FILE *, int, rtx, int);
245 static bool sh_function_ok_for_sibcall (tree, tree);
247 static bool sh_cannot_modify_jumps_p (void);
248 static int sh_target_reg_class (void);
249 static bool sh_optimize_target_register_callee_saved (bool);
250 static bool sh_ms_bitfield_layout_p (tree);
252 static void sh_init_builtins (void);
253 static void sh_media_init_builtins (void);
254 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
255 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
256 static void sh_file_start (void);
257 static int flow_dependent_p (rtx, rtx);
258 static void flow_dependent_p_1 (rtx, rtx, void *);
259 static int shiftcosts (rtx);
260 static int andcosts (rtx);
261 static int addsubcosts (rtx);
262 static int multcosts (rtx);
263 static bool unspec_caller_rtx_p (rtx);
264 static bool sh_cannot_copy_insn_p (rtx);
265 static bool sh_rtx_costs (rtx, int, int, int *);
266 static int sh_address_cost (rtx);
267 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
268 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
269 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
270 static int scavenge_reg (HARD_REG_SET *s);
271 struct save_schedule_s;
272 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
273 struct save_schedule_s *, int);
275 static rtx sh_struct_value_rtx (tree, int);
276 static bool sh_return_in_memory (tree, tree);
277 static rtx sh_builtin_saveregs (void);
278 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
279 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
280 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
281 static tree sh_build_builtin_va_list (void);
284 /* Initialize the GCC target structure. */
285 #undef TARGET_ATTRIBUTE_TABLE
286 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
288 /* The next two are used for debug info when compiling with -gdwarf. */
289 #undef TARGET_ASM_UNALIGNED_HI_OP
290 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
291 #undef TARGET_ASM_UNALIGNED_SI_OP
292 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
294 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
295 #undef TARGET_ASM_UNALIGNED_DI_OP
296 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
297 #undef TARGET_ASM_ALIGNED_DI_OP
298 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
300 #undef TARGET_ASM_FUNCTION_EPILOGUE
301 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
303 #undef TARGET_ASM_OUTPUT_MI_THUNK
304 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
306 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
307 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
309 #undef TARGET_ASM_FILE_START
310 #define TARGET_ASM_FILE_START sh_file_start
311 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
312 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
314 #undef TARGET_INSERT_ATTRIBUTES
315 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
317 #undef TARGET_SCHED_ADJUST_COST
318 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
320 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
321 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
323 #undef TARGET_SCHED_ISSUE_RATE
324 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
326 /* The next 5 hooks have been implemented for reenabling sched1. With the
327 help of these macros we are limiting the movement of insns in sched1 to
328 reduce the register pressure. The overall idea is to keep count of SImode
329 and SFmode regs required by already scheduled insns. When these counts
330 cross some threshold values; give priority to insns that free registers.
331 The insn that frees registers is most likely to be the insn with lowest
332 LUID (original insn order); but such an insn might be there in the stalled
333 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
334 upto a max of 8 cycles so that such insns may move from Q -> R.
336 The description of the hooks are as below:
338 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
339 scheduler; it is called inside the sched_init function just after
340 find_insn_reg_weights function call. It is used to calculate the SImode
341 and SFmode weights of insns of basic blocks; much similar to what
342 find_insn_reg_weights does.
343 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
345 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
346 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
349 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
350 high; reorder the ready queue so that the insn with lowest LUID will be
353 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
354 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
356 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
357 can be returned from TARGET_SCHED_REORDER2.
359 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
361 #undef TARGET_SCHED_DFA_NEW_CYCLE
362 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
364 #undef TARGET_SCHED_INIT_GLOBAL
365 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
367 #undef TARGET_SCHED_FINISH_GLOBAL
368 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
370 #undef TARGET_SCHED_VARIABLE_ISSUE
371 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
373 #undef TARGET_SCHED_REORDER
374 #define TARGET_SCHED_REORDER sh_reorder
376 #undef TARGET_SCHED_REORDER2
377 #define TARGET_SCHED_REORDER2 sh_reorder2
379 #undef TARGET_SCHED_INIT
380 #define TARGET_SCHED_INIT sh_md_init
382 #undef TARGET_CANNOT_MODIFY_JUMPS_P
383 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
384 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
385 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
386 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
387 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
388 sh_optimize_target_register_callee_saved
390 #undef TARGET_MS_BITFIELD_LAYOUT_P
391 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
393 #undef TARGET_INIT_BUILTINS
394 #define TARGET_INIT_BUILTINS sh_init_builtins
395 #undef TARGET_EXPAND_BUILTIN
396 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
398 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
399 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
401 #undef TARGET_CANNOT_COPY_INSN_P
402 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
403 #undef TARGET_RTX_COSTS
404 #define TARGET_RTX_COSTS sh_rtx_costs
405 #undef TARGET_ADDRESS_COST
406 #define TARGET_ADDRESS_COST sh_address_cost
408 #undef TARGET_MACHINE_DEPENDENT_REORG
409 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
412 #undef TARGET_HAVE_TLS
413 #define TARGET_HAVE_TLS true
416 #undef TARGET_PROMOTE_PROTOTYPES
417 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
418 #undef TARGET_PROMOTE_FUNCTION_ARGS
419 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
420 #undef TARGET_PROMOTE_FUNCTION_RETURN
421 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
423 #undef TARGET_STRUCT_VALUE_RTX
424 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
425 #undef TARGET_RETURN_IN_MEMORY
426 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
428 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
429 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
430 #undef TARGET_SETUP_INCOMING_VARARGS
431 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
432 #undef TARGET_STRICT_ARGUMENT_NAMING
433 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
434 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
435 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
437 #undef TARGET_BUILD_BUILTIN_VA_LIST
438 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
440 #undef TARGET_PCH_VALID_P
441 #define TARGET_PCH_VALID_P sh_pch_valid_p
443 /* Return regmode weight for insn. */
444 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
446 /* Return current register pressure for regmode. */
447 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
449 struct gcc_target targetm = TARGET_INITIALIZER;
451 /* Print the operand address in x to the stream. */
454 print_operand_address (FILE *stream, rtx x)
456 switch (GET_CODE (x))
460 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
465 rtx base = XEXP (x, 0);
466 rtx index = XEXP (x, 1);
468 switch (GET_CODE (index))
471 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
472 reg_names[true_regnum (base)]);
478 int base_num = true_regnum (base);
479 int index_num = true_regnum (index);
481 fprintf (stream, "@(r0,%s)",
482 reg_names[MAX (base_num, index_num)]);
494 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
498 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
502 x = mark_constant_pool_use (x);
503 output_addr_const (stream, x);
508 /* Print operand x (an rtx) in assembler syntax to file stream
509 according to modifier code.
511 '.' print a .s if insn needs delay slot
512 ',' print LOCAL_LABEL_PREFIX
513 '@' print trap, rte or rts depending upon pragma interruptness
514 '#' output a nop if there is nothing to put in the delay slot
515 ''' print likelihood suffix (/u for unlikely).
516 'O' print a constant without the #
517 'R' print the LSW of a dp value - changes if in little endian
518 'S' print the MSW of a dp value - changes if in little endian
519 'T' print the next word of a dp value - same as 'R' in big endian mode.
520 'M' print an `x' if `m' will print `base,index'.
521 'N' print 'r63' if the operand is (const_int 0).
522 'm' print a pair `base,offset' or `base,index', for LD and ST.
523 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
524 'o' output an operator. */
527 print_operand (FILE *stream, rtx x, int code)
533 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
534 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
535 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
538 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
542 fprintf (stream, "trapa #%d", trap_exit);
543 else if (sh_cfun_interrupt_handler_p ())
544 fprintf (stream, "rte");
546 fprintf (stream, "rts");
549 /* Output a nop if there's nothing in the delay slot. */
550 if (dbr_sequence_length () == 0)
551 fprintf (stream, "\n\tnop");
555 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
557 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
558 fputs ("/u", stream);
562 x = mark_constant_pool_use (x);
563 output_addr_const (stream, x);
566 fputs (reg_names[REGNO (x) + LSW], (stream));
569 fputs (reg_names[REGNO (x) + MSW], (stream));
572 /* Next word of a double. */
573 switch (GET_CODE (x))
576 fputs (reg_names[REGNO (x) + 1], (stream));
579 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
580 && GET_CODE (XEXP (x, 0)) != POST_INC)
581 x = adjust_address (x, SImode, 4);
582 print_operand_address (stream, XEXP (x, 0));
589 switch (GET_CODE (x))
591 case PLUS: fputs ("add", stream); break;
592 case MINUS: fputs ("sub", stream); break;
593 case MULT: fputs ("mul", stream); break;
594 case DIV: fputs ("div", stream); break;
595 case EQ: fputs ("eq", stream); break;
596 case NE: fputs ("ne", stream); break;
597 case GT: case LT: fputs ("gt", stream); break;
598 case GE: case LE: fputs ("ge", stream); break;
599 case GTU: case LTU: fputs ("gtu", stream); break;
600 case GEU: case LEU: fputs ("geu", stream); break;
606 if (GET_CODE (x) == MEM
607 && GET_CODE (XEXP (x, 0)) == PLUS
608 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
609 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
614 if (GET_CODE (x) != MEM)
617 switch (GET_CODE (x))
621 print_operand (stream, x, 0);
622 fputs (", 0", stream);
626 print_operand (stream, XEXP (x, 0), 0);
627 fputs (", ", stream);
628 print_operand (stream, XEXP (x, 1), 0);
637 if (x == CONST0_RTX (GET_MODE (x)))
639 fprintf ((stream), "r63");
644 if (GET_CODE (x) == CONST_INT)
646 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
653 switch (GET_CODE (x))
655 /* FIXME: We need this on SHmedia32 because reload generates
656 some sign-extended HI or QI loads into DImode registers
657 but, because Pmode is SImode, the address ends up with a
658 subreg:SI of the DImode register. Maybe reload should be
659 fixed so as to apply alter_subreg to such loads? */
661 if (SUBREG_BYTE (x) != 0
662 || GET_CODE (SUBREG_REG (x)) != REG)
669 if (FP_REGISTER_P (REGNO (x))
670 && GET_MODE (x) == V16SFmode)
671 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
672 else if (FP_REGISTER_P (REGNO (x))
673 && GET_MODE (x) == V4SFmode)
674 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
675 else if (GET_CODE (x) == REG
676 && GET_MODE (x) == V2SFmode)
677 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
678 else if (FP_REGISTER_P (REGNO (x))
679 && GET_MODE_SIZE (GET_MODE (x)) > 4)
680 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
682 fputs (reg_names[REGNO (x)], (stream));
686 output_address (XEXP (x, 0));
691 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
692 && GET_MODE (XEXP (x, 0)) == DImode
693 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
694 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
696 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
699 if (GET_CODE (val) == ASHIFTRT)
702 if (GET_CODE (XEXP (val, 0)) == CONST)
704 output_addr_const (stream, XEXP (val, 0));
705 if (GET_CODE (XEXP (val, 0)) == CONST)
707 fputs (" >> ", stream);
708 output_addr_const (stream, XEXP (val, 1));
713 if (GET_CODE (val) == CONST)
715 output_addr_const (stream, val);
716 if (GET_CODE (val) == CONST)
719 fputs (" & 65535)", stream);
727 output_addr_const (stream, x);
734 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
736 force_into (rtx value, rtx target)
738 value = force_operand (value, target);
739 if (! rtx_equal_p (value, target))
740 emit_insn (gen_move_insn (target, value));
743 /* Emit code to perform a block move. Choose the best method.
745 OPERANDS[0] is the destination.
746 OPERANDS[1] is the source.
747 OPERANDS[2] is the size.
748 OPERANDS[3] is the alignment safe to use. */
751 expand_block_move (rtx *operands)
753 int align = INTVAL (operands[3]);
754 int constp = (GET_CODE (operands[2]) == CONST_INT);
755 int bytes = (constp ? INTVAL (operands[2]) : 0);
757 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
758 alignment, or if it isn't a multiple of 4 bytes, then fail. */
759 if (! constp || align < 4 || (bytes % 4 != 0))
766 else if (bytes == 12)
771 rtx r4 = gen_rtx_REG (SImode, 4);
772 rtx r5 = gen_rtx_REG (SImode, 5);
774 entry_name = get_identifier ("__movstrSI12_i4");
776 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
777 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
778 force_into (XEXP (operands[0], 0), r4);
779 force_into (XEXP (operands[1], 0), r5);
780 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
783 else if (! TARGET_SMALLCODE)
789 rtx r4 = gen_rtx_REG (SImode, 4);
790 rtx r5 = gen_rtx_REG (SImode, 5);
791 rtx r6 = gen_rtx_REG (SImode, 6);
793 entry_name = get_identifier (bytes & 4
795 : "__movstr_i4_even");
796 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
797 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
798 force_into (XEXP (operands[0], 0), r4);
799 force_into (XEXP (operands[1], 0), r5);
802 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
803 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
815 rtx r4 = gen_rtx_REG (SImode, 4);
816 rtx r5 = gen_rtx_REG (SImode, 5);
818 sprintf (entry, "__movstrSI%d", bytes);
819 entry_name = get_identifier (entry);
820 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
821 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
822 force_into (XEXP (operands[0], 0), r4);
823 force_into (XEXP (operands[1], 0), r5);
824 emit_insn (gen_block_move_real (func_addr_rtx));
828 /* This is the same number of bytes as a memcpy call, but to a different
829 less common function name, so this will occasionally use more space. */
830 if (! TARGET_SMALLCODE)
835 int final_switch, while_loop;
836 rtx r4 = gen_rtx_REG (SImode, 4);
837 rtx r5 = gen_rtx_REG (SImode, 5);
838 rtx r6 = gen_rtx_REG (SImode, 6);
840 entry_name = get_identifier ("__movstr");
841 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
842 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
843 force_into (XEXP (operands[0], 0), r4);
844 force_into (XEXP (operands[1], 0), r5);
846 /* r6 controls the size of the move. 16 is decremented from it
847 for each 64 bytes moved. Then the negative bit left over is used
848 as an index into a list of move instructions. e.g., a 72 byte move
849 would be set up with size(r6) = 14, for one iteration through the
850 big while loop, and a switch of -2 for the last part. */
852 final_switch = 16 - ((bytes / 4) % 16);
853 while_loop = ((bytes / 4) / 16 - 1) * 16;
854 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
855 emit_insn (gen_block_lump_real (func_addr_rtx));
862 /* Prepare operands for a move define_expand; specifically, one of the
863 operands must be in a register. */
866 prepare_move_operands (rtx operands[], enum machine_mode mode)
868 if ((mode == SImode || mode == DImode)
870 && ! ((mode == Pmode || mode == ptr_mode)
871 && tls_symbolic_operand (operands[1], Pmode) != 0))
874 if (SYMBOLIC_CONST_P (operands[1]))
876 if (GET_CODE (operands[0]) == MEM)
877 operands[1] = force_reg (Pmode, operands[1]);
878 else if (TARGET_SHMEDIA
879 && GET_CODE (operands[1]) == LABEL_REF
880 && target_reg_operand (operands[0], mode))
884 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
885 operands[1] = legitimize_pic_address (operands[1], mode, temp);
888 else if (GET_CODE (operands[1]) == CONST
889 && GET_CODE (XEXP (operands[1], 0)) == PLUS
890 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
892 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
893 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
895 operands[1] = expand_binop (mode, add_optab, temp,
896 XEXP (XEXP (operands[1], 0), 1),
897 no_new_pseudos ? temp
898 : gen_reg_rtx (Pmode),
903 if (! reload_in_progress && ! reload_completed)
905 /* Copy the source to a register if both operands aren't registers. */
906 if (! register_operand (operands[0], mode)
907 && ! sh_register_operand (operands[1], mode))
908 operands[1] = copy_to_mode_reg (mode, operands[1]);
910 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
912 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
913 except that we can't use that function because it is static. */
914 rtx new = change_address (operands[0], mode, 0);
915 MEM_COPY_ATTRIBUTES (new, operands[0]);
919 /* This case can happen while generating code to move the result
920 of a library call to the target. Reject `st r0,@(rX,rY)' because
921 reload will fail to find a spill register for rX, since r0 is already
922 being used for the source. */
923 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
924 && GET_CODE (operands[0]) == MEM
925 && GET_CODE (XEXP (operands[0], 0)) == PLUS
926 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
927 operands[1] = copy_to_mode_reg (mode, operands[1]);
930 if (mode == Pmode || mode == ptr_mode)
933 enum tls_model tls_kind;
937 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
939 rtx tga_op1, tga_ret, tmp, tmp2;
944 case TLS_MODEL_GLOBAL_DYNAMIC:
945 tga_ret = gen_rtx_REG (Pmode, R0_REG);
946 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
950 case TLS_MODEL_LOCAL_DYNAMIC:
951 tga_ret = gen_rtx_REG (Pmode, R0_REG);
952 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
954 tmp = gen_reg_rtx (Pmode);
955 emit_move_insn (tmp, tga_ret);
957 if (register_operand (op0, Pmode))
960 tmp2 = gen_reg_rtx (Pmode);
962 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
966 case TLS_MODEL_INITIAL_EXEC:
968 emit_insn (gen_GOTaddr2picreg ());
969 tga_op1 = gen_reg_rtx (Pmode);
970 tmp = gen_sym2GOTTPOFF (op1);
971 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
975 case TLS_MODEL_LOCAL_EXEC:
976 tmp2 = gen_reg_rtx (Pmode);
977 emit_insn (gen_load_gbr (tmp2));
978 tmp = gen_reg_rtx (Pmode);
979 emit_insn (gen_symTPOFF2reg (tmp, op1));
980 RTX_UNCHANGING_P (tmp) = 1;
982 if (register_operand (op0, Pmode))
985 op1 = gen_reg_rtx (Pmode);
987 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1000 /* Prepare the operands for an scc instruction; make sure that the
1001 compare has been done. */
1003 prepare_scc_operands (enum rtx_code code)
1005 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1006 enum rtx_code oldcode = code;
1007 enum machine_mode mode;
1009 /* First need a compare insn. */
1013 /* It isn't possible to handle this case. */
1030 if (code != oldcode)
1032 rtx tmp = sh_compare_op0;
1033 sh_compare_op0 = sh_compare_op1;
1034 sh_compare_op1 = tmp;
1037 mode = GET_MODE (sh_compare_op0);
1038 if (mode == VOIDmode)
1039 mode = GET_MODE (sh_compare_op1);
1041 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1042 if ((code != EQ && code != NE
1043 && (sh_compare_op1 != const0_rtx
1044 || code == GTU || code == GEU || code == LTU || code == LEU))
1045 || (mode == DImode && sh_compare_op1 != const0_rtx)
1046 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1047 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1049 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1050 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1051 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1052 gen_rtx_SET (VOIDmode, t_reg,
1053 gen_rtx_fmt_ee (code, SImode,
1054 sh_compare_op0, sh_compare_op1)),
1055 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1057 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1058 gen_rtx_fmt_ee (code, SImode,
1059 sh_compare_op0, sh_compare_op1)));
1064 /* Called from the md file, set up the operands of a compare instruction. */
1067 from_compare (rtx *operands, int code)
1069 enum machine_mode mode = GET_MODE (sh_compare_op0);
1071 if (mode == VOIDmode)
1072 mode = GET_MODE (sh_compare_op1);
1075 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1077 /* Force args into regs, since we can't use constants here. */
1078 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1079 if (sh_compare_op1 != const0_rtx
1080 || code == GTU || code == GEU
1081 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1082 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1084 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1086 from_compare (operands, GT);
1087 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1090 insn = gen_rtx_SET (VOIDmode,
1091 gen_rtx_REG (SImode, T_REG),
1092 gen_rtx_fmt_ee (code, SImode,
1093 sh_compare_op0, sh_compare_op1));
1094 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1096 insn = gen_rtx_PARALLEL (VOIDmode,
1098 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1099 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1105 /* Functions to output assembly code. */
1107 /* Return a sequence of instructions to perform DI or DF move.
1109 Since the SH cannot move a DI or DF in one instruction, we have
1110 to take care when we see overlapping source and dest registers. */
1113 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1114 enum machine_mode mode)
1116 rtx dst = operands[0];
1117 rtx src = operands[1];
1119 if (GET_CODE (dst) == MEM
1120 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1121 return "mov.l %T1,%0\n\tmov.l %1,%0";
1123 if (register_operand (dst, mode)
1124 && register_operand (src, mode))
1126 if (REGNO (src) == MACH_REG)
1127 return "sts mach,%S0\n\tsts macl,%R0";
1129 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1130 when mov.d r1,r0 do r1->r0 then r2->r1. */
1132 if (REGNO (src) + 1 == REGNO (dst))
1133 return "mov %T1,%T0\n\tmov %1,%0";
1135 return "mov %1,%0\n\tmov %T1,%T0";
1137 else if (GET_CODE (src) == CONST_INT)
1139 if (INTVAL (src) < 0)
1140 output_asm_insn ("mov #-1,%S0", operands);
1142 output_asm_insn ("mov #0,%S0", operands);
1144 return "mov %1,%R0";
1146 else if (GET_CODE (src) == MEM)
1149 int dreg = REGNO (dst);
1150 rtx inside = XEXP (src, 0);
1152 if (GET_CODE (inside) == REG)
1153 ptrreg = REGNO (inside);
1154 else if (GET_CODE (inside) == SUBREG)
1155 ptrreg = subreg_regno (inside);
1156 else if (GET_CODE (inside) == PLUS)
1158 ptrreg = REGNO (XEXP (inside, 0));
1159 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1160 an offsettable address. Unfortunately, offsettable addresses use
1161 QImode to check the offset, and a QImode offsettable address
1162 requires r0 for the other operand, which is not currently
1163 supported, so we can't use the 'o' constraint.
1164 Thus we must check for and handle r0+REG addresses here.
1165 We punt for now, since this is likely very rare. */
1166 if (GET_CODE (XEXP (inside, 1)) == REG)
1169 else if (GET_CODE (inside) == LABEL_REF)
1170 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1171 else if (GET_CODE (inside) == POST_INC)
1172 return "mov.l %1,%0\n\tmov.l %1,%T0";
1176 /* Work out the safe way to copy. Copy into the second half first. */
1178 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1181 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1184 /* Print an instruction which would have gone into a delay slot after
1185 another instruction, but couldn't because the other instruction expanded
1186 into a sequence where putting the slot insn at the end wouldn't work. */
1189 print_slot (rtx insn)
1191 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1193 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1197 output_far_jump (rtx insn, rtx op)
1199 struct { rtx lab, reg, op; } this;
1200 rtx braf_base_lab = NULL_RTX;
1203 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1206 this.lab = gen_label_rtx ();
1210 && offset - get_attr_length (insn) <= 32766)
1213 jump = "mov.w %O0,%1; braf %1";
1221 jump = "mov.l %O0,%1; braf %1";
1223 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1226 jump = "mov.l %O0,%1; jmp @%1";
1228 /* If we have a scratch register available, use it. */
1229 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1230 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1232 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1233 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1234 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1235 output_asm_insn (jump, &this.lab);
1236 if (dbr_sequence_length ())
1237 print_slot (final_sequence);
1239 output_asm_insn ("nop", 0);
1243 /* Output the delay slot insn first if any. */
1244 if (dbr_sequence_length ())
1245 print_slot (final_sequence);
1247 this.reg = gen_rtx_REG (SImode, 13);
1248 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1249 Fortunately, MACL is fixed and call-clobbered, and we never
1250 need its value across jumps, so save r13 in it instead of in
1253 output_asm_insn ("lds r13, macl", 0);
1255 output_asm_insn ("mov.l r13,@-r15", 0);
1256 output_asm_insn (jump, &this.lab);
1258 output_asm_insn ("sts macl, r13", 0);
1260 output_asm_insn ("mov.l @r15+,r13", 0);
1262 if (far && flag_pic && TARGET_SH2)
1264 braf_base_lab = gen_label_rtx ();
1265 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1266 CODE_LABEL_NUMBER (braf_base_lab));
1269 output_asm_insn (".align 2", 0);
1270 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1272 if (far && flag_pic)
1275 this.lab = braf_base_lab;
1276 output_asm_insn (".long %O2-%O0", &this.lab);
1279 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1283 /* Local label counter, used for constants in the pool and inside
1284 pattern branches. */
1286 static int lf = 100;
1288 /* Output code for ordinary branches. */
1291 output_branch (int logic, rtx insn, rtx *operands)
1293 switch (get_attr_length (insn))
1296 /* This can happen if filling the delay slot has caused a forward
1297 branch to exceed its range (we could reverse it, but only
1298 when we know we won't overextend other branches; this should
1299 best be handled by relaxation).
1300 It can also happen when other condbranches hoist delay slot insn
1301 from their destination, thus leading to code size increase.
1302 But the branch will still be in the range -4092..+4098 bytes. */
1307 /* The call to print_slot will clobber the operands. */
1308 rtx op0 = operands[0];
1310 /* If the instruction in the delay slot is annulled (true), then
1311 there is no delay slot where we can put it now. The only safe
1312 place for it is after the label. final will do that by default. */
1315 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1317 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1318 ASSEMBLER_DIALECT ? "/" : ".", label);
1319 print_slot (final_sequence);
1322 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1324 output_asm_insn ("bra\t%l0", &op0);
1325 fprintf (asm_out_file, "\tnop\n");
1326 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1330 /* When relaxing, handle this like a short branch. The linker
1331 will fix it up if it still doesn't fit after relaxation. */
1333 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1335 /* These are for SH2e, in which we have to account for the
1336 extra nop because of the hardware bug in annulled branches. */
1343 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1345 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1347 ASSEMBLER_DIALECT ? "/" : ".", label);
1348 fprintf (asm_out_file, "\tnop\n");
1349 output_asm_insn ("bra\t%l0", operands);
1350 fprintf (asm_out_file, "\tnop\n");
1351 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1355 /* When relaxing, fall through. */
1360 sprintf (buffer, "b%s%ss\t%%l0",
1362 ASSEMBLER_DIALECT ? "/" : ".");
1363 output_asm_insn (buffer, &operands[0]);
1368 /* There should be no longer branches now - that would
1369 indicate that something has destroyed the branches set
1370 up in machine_dependent_reorg. */
1376 output_branchy_insn (enum rtx_code code, const char *template,
1377 rtx insn, rtx *operands)
1379 rtx next_insn = NEXT_INSN (insn);
1381 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1383 rtx src = SET_SRC (PATTERN (next_insn));
1384 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1386 /* Following branch not taken */
1387 operands[9] = gen_label_rtx ();
1388 emit_label_after (operands[9], next_insn);
1389 INSN_ADDRESSES_NEW (operands[9],
1390 INSN_ADDRESSES (INSN_UID (next_insn))
1391 + get_attr_length (next_insn));
1396 int offset = (branch_dest (next_insn)
1397 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1398 if (offset >= -252 && offset <= 258)
1400 if (GET_CODE (src) == IF_THEN_ELSE)
1402 src = XEXP (src, 1);
1408 operands[9] = gen_label_rtx ();
1409 emit_label_after (operands[9], insn);
1410 INSN_ADDRESSES_NEW (operands[9],
1411 INSN_ADDRESSES (INSN_UID (insn))
1412 + get_attr_length (insn));
1417 output_ieee_ccmpeq (rtx insn, rtx *operands)
1419 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1422 /* Output the start of the assembler file. */
1425 sh_file_start (void)
1427 default_file_start ();
1430 /* We need to show the text section with the proper
1431 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1432 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1433 will complain. We can teach GAS specifically about the
1434 default attributes for our choice of text section, but
1435 then we would have to change GAS again if/when we change
1436 the text section name. */
1437 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1439 /* Switch to the data section so that the coffsem symbol
1440 isn't in the text section. */
1443 if (TARGET_LITTLE_ENDIAN)
1444 fputs ("\t.little\n", asm_out_file);
1448 if (TARGET_SHCOMPACT)
1449 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1450 else if (TARGET_SHMEDIA)
1451 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1452 TARGET_SHMEDIA64 ? 64 : 32);
1456 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1459 unspec_caller_rtx_p (rtx pat)
1461 switch (GET_CODE (pat))
1464 return unspec_caller_rtx_p (XEXP (pat, 0));
1467 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1469 return unspec_caller_rtx_p (XEXP (pat, 1));
1471 if (XINT (pat, 1) == UNSPEC_CALLER)
1480 /* Indicate that INSN cannot be duplicated. This is true for insn
1481 that generates an unique label. */
1484 sh_cannot_copy_insn_p (rtx insn)
1488 if (!reload_completed || !flag_pic)
1491 if (GET_CODE (insn) != INSN)
1493 if (asm_noperands (insn) >= 0)
1496 pat = PATTERN (insn);
1497 if (GET_CODE (pat) != SET)
1499 pat = SET_SRC (pat);
1501 if (unspec_caller_rtx_p (pat))
1507 /* Actual number of instructions used to make a shift by N. */
1508 static const char ashiftrt_insns[] =
1509 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1511 /* Left shift and logical right shift are the same. */
1512 static const char shift_insns[] =
1513 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1515 /* Individual shift amounts needed to get the above length sequences.
1516 One bit right shifts clobber the T bit, so when possible, put one bit
1517 shifts in the middle of the sequence, so the ends are eligible for
1518 branch delay slots. */
1519 static const short shift_amounts[32][5] = {
1520 {0}, {1}, {2}, {2, 1},
1521 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1522 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1523 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1524 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1525 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1526 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1527 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1529 /* Likewise, but for shift amounts < 16, up to three highmost bits
1530 might be clobbered. This is typically used when combined with some
1531 kind of sign or zero extension. */
1533 static const char ext_shift_insns[] =
1534 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1536 static const short ext_shift_amounts[32][4] = {
1537 {0}, {1}, {2}, {2, 1},
1538 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1539 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1540 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1541 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1542 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1543 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1544 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1546 /* Assuming we have a value that has been sign-extended by at least one bit,
1547 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1548 to shift it by N without data loss, and quicker than by other means? */
1549 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1551 /* This is used in length attributes in sh.md to help compute the length
1552 of arbitrary constant shift instructions. */
1555 shift_insns_rtx (rtx insn)
1557 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1558 int shift_count = INTVAL (XEXP (set_src, 1));
1559 enum rtx_code shift_code = GET_CODE (set_src);
1564 return ashiftrt_insns[shift_count];
1567 return shift_insns[shift_count];
1573 /* Return the cost of a shift. */
1583 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1585 if (GET_MODE (x) == DImode
1586 && GET_CODE (XEXP (x, 1)) == CONST_INT
1587 && INTVAL (XEXP (x, 1)) == 1)
1590 /* Everything else is invalid, because there is no pattern for it. */
1593 /* If shift by a non constant, then this will be expensive. */
1594 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1595 return SH_DYNAMIC_SHIFT_COST;
1597 value = INTVAL (XEXP (x, 1));
1599 /* Otherwise, return the true cost in instructions. */
1600 if (GET_CODE (x) == ASHIFTRT)
1602 int cost = ashiftrt_insns[value];
1603 /* If SH3, then we put the constant in a reg and use shad. */
1604 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1605 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1609 return shift_insns[value];
1612 /* Return the cost of an AND operation. */
1619 /* Anding with a register is a single cycle and instruction. */
1620 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1623 i = INTVAL (XEXP (x, 1));
1627 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1628 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1629 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1635 /* These constants are single cycle extu.[bw] instructions. */
1636 if (i == 0xff || i == 0xffff)
1638 /* Constants that can be used in an and immediate instruction in a single
1639 cycle, but this requires r0, so make it a little more expensive. */
1640 if (CONST_OK_FOR_K08 (i))
1642 /* Constants that can be loaded with a mov immediate and an and.
1643 This case is probably unnecessary. */
1644 if (CONST_OK_FOR_I08 (i))
1646 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1647 This case is probably unnecessary. */
1651 /* Return the cost of an addition or a subtraction. */
1656 /* Adding a register is a single cycle insn. */
1657 if (GET_CODE (XEXP (x, 1)) == REG
1658 || GET_CODE (XEXP (x, 1)) == SUBREG)
1661 /* Likewise for small constants. */
1662 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1663 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1667 switch (GET_CODE (XEXP (x, 1)))
1672 return TARGET_SHMEDIA64 ? 5 : 3;
1675 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1677 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1679 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1687 /* Any other constant requires a 2 cycle pc-relative load plus an
1692 /* Return the cost of a multiply. */
1694 multcosts (rtx x ATTRIBUTE_UNUSED)
1701 /* We have a mul insn, so we can never take more than the mul and the
1702 read of the mac reg, but count more because of the latency and extra
1704 if (TARGET_SMALLCODE)
1709 /* If we're aiming at small code, then just count the number of
1710 insns in a multiply call sequence. */
1711 if (TARGET_SMALLCODE)
1714 /* Otherwise count all the insns in the routine we'd be calling too. */
1718 /* Compute a (partial) cost for rtx X. Return true if the complete
1719 cost has been computed, and false if subexpressions should be
1720 scanned. In either case, *TOTAL contains the cost result. */
1723 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1730 if (INTVAL (x) == 0)
1732 else if (outer_code == AND && and_operand ((x), DImode))
1734 else if ((outer_code == IOR || outer_code == XOR
1735 || outer_code == PLUS)
1736 && CONST_OK_FOR_I10 (INTVAL (x)))
1738 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1739 *total = COSTS_N_INSNS (outer_code != SET);
1740 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1741 *total = COSTS_N_INSNS (2);
1742 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1743 *total = COSTS_N_INSNS (3);
1745 *total = COSTS_N_INSNS (4);
1748 if (CONST_OK_FOR_I08 (INTVAL (x)))
1750 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1751 && CONST_OK_FOR_K08 (INTVAL (x)))
1760 if (TARGET_SHMEDIA64)
1761 *total = COSTS_N_INSNS (4);
1762 else if (TARGET_SHMEDIA32)
1763 *total = COSTS_N_INSNS (2);
1770 *total = COSTS_N_INSNS (4);
1776 *total = COSTS_N_INSNS (addsubcosts (x));
1780 *total = COSTS_N_INSNS (andcosts (x));
1784 *total = COSTS_N_INSNS (multcosts (x));
1790 *total = COSTS_N_INSNS (shiftcosts (x));
1797 *total = COSTS_N_INSNS (20);
1810 /* Compute the cost of an address. For the SH, all valid addresses are
1811 the same cost. Use a slightly higher cost for reg + reg addressing,
1812 since it increases pressure on r0. */
1815 sh_address_cost (rtx X)
1817 return (GET_CODE (X) == PLUS
1818 && ! CONSTANT_P (XEXP (X, 1))
1819 && ! TARGET_SHMEDIA ? 1 : 0);
1822 /* Code to expand a shift. */
1825 gen_ashift (int type, int n, rtx reg)
1827 /* Negative values here come from the shift_amounts array. */
1840 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1844 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1846 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1849 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1854 /* Same for HImode */
1857 gen_ashift_hi (int type, int n, rtx reg)
1859 /* Negative values here come from the shift_amounts array. */
1873 /* We don't have HImode right shift operations because using the
1874 ordinary 32 bit shift instructions for that doesn't generate proper
1875 zero/sign extension.
1876 gen_ashift_hi is only called in contexts where we know that the
1877 sign extension works out correctly. */
1880 if (GET_CODE (reg) == SUBREG)
1882 offset = SUBREG_BYTE (reg);
1883 reg = SUBREG_REG (reg);
1885 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1889 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1894 /* Output RTL to split a constant shift into its component SH constant
1895 shift instructions. */
1898 gen_shifty_op (int code, rtx *operands)
1900 int value = INTVAL (operands[2]);
1903 /* Truncate the shift count in case it is out of bounds. */
1904 value = value & 0x1f;
1908 if (code == LSHIFTRT)
1910 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1911 emit_insn (gen_movt (operands[0]));
1914 else if (code == ASHIFT)
1916 /* There is a two instruction sequence for 31 bit left shifts,
1917 but it requires r0. */
1918 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1920 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1921 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1926 else if (value == 0)
1928 /* This can happen when not optimizing. We must output something here
1929 to prevent the compiler from aborting in final.c after the try_split
1931 emit_insn (gen_nop ());
1935 max = shift_insns[value];
1936 for (i = 0; i < max; i++)
1937 gen_ashift (code, shift_amounts[value][i], operands[0]);
1940 /* Same as above, but optimized for values where the topmost bits don't
1944 gen_shifty_hi_op (int code, rtx *operands)
1946 int value = INTVAL (operands[2]);
1948 void (*gen_fun) (int, int, rtx);
1950 /* This operation is used by and_shl for SImode values with a few
1951 high bits known to be cleared. */
1955 emit_insn (gen_nop ());
1959 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1962 max = ext_shift_insns[value];
1963 for (i = 0; i < max; i++)
1964 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1967 /* When shifting right, emit the shifts in reverse order, so that
1968 solitary negative values come first. */
1969 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1970 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1973 /* Output RTL for an arithmetic right shift. */
1975 /* ??? Rewrite to use super-optimizer sequences. */
1978 expand_ashiftrt (rtx *operands)
1988 if (GET_CODE (operands[2]) != CONST_INT)
1990 rtx count = copy_to_mode_reg (SImode, operands[2]);
1991 emit_insn (gen_negsi2 (count, count));
1992 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1995 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1996 > 1 + SH_DYNAMIC_SHIFT_COST)
1999 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2000 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2004 if (GET_CODE (operands[2]) != CONST_INT)
2007 value = INTVAL (operands[2]) & 31;
2011 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2014 else if (value >= 16 && value <= 19)
2016 wrk = gen_reg_rtx (SImode);
2017 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2020 gen_ashift (ASHIFTRT, 1, wrk);
2021 emit_move_insn (operands[0], wrk);
2024 /* Expand a short sequence inline, longer call a magic routine. */
2025 else if (value <= 5)
2027 wrk = gen_reg_rtx (SImode);
2028 emit_move_insn (wrk, operands[1]);
2030 gen_ashift (ASHIFTRT, 1, wrk);
2031 emit_move_insn (operands[0], wrk);
2035 wrk = gen_reg_rtx (Pmode);
2037 /* Load the value into an arg reg and call a helper. */
2038 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2039 sprintf (func, "__ashiftrt_r4_%d", value);
2040 func_name = get_identifier (func);
2041 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2042 emit_move_insn (wrk, sym);
2043 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2044 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2049 sh_dynamicalize_shift_p (rtx count)
2051 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2054 /* Try to find a good way to implement the combiner pattern
2055 [(set (match_operand:SI 0 "register_operand" "r")
2056 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2057 (match_operand:SI 2 "const_int_operand" "n"))
2058 (match_operand:SI 3 "const_int_operand" "n"))) .
2059 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2060 return 0 for simple right / left or left/right shift combination.
2061 return 1 for a combination of shifts with zero_extend.
2062 return 2 for a combination of shifts with an AND that needs r0.
2063 return 3 for a combination of shifts with an AND that needs an extra
2064 scratch register, when the three highmost bits of the AND mask are clear.
2065 return 4 for a combination of shifts with an AND that needs an extra
2066 scratch register, when any of the three highmost bits of the AND mask
2068 If ATTRP is set, store an initial right shift width in ATTRP[0],
2069 and the instruction length in ATTRP[1] . These values are not valid
2071 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2072 shift_amounts for the last shift value that is to be used before the
2075 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2077 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2078 int left = INTVAL (left_rtx), right;
2080 int cost, best_cost = 10000;
2081 int best_right = 0, best_len = 0;
2085 if (left < 0 || left > 31)
2087 if (GET_CODE (mask_rtx) == CONST_INT)
2088 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2090 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2091 /* Can this be expressed as a right shift / left shift pair? */
2092 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2093 right = exact_log2 (lsb);
2094 mask2 = ~(mask + lsb - 1);
2095 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2096 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2098 best_cost = shift_insns[right] + shift_insns[right + left];
2099 /* mask has no trailing zeroes <==> ! right */
2100 else if (! right && mask2 == ~(lsb2 - 1))
2102 int late_right = exact_log2 (lsb2);
2103 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2105 /* Try to use zero extend. */
2106 if (mask2 == ~(lsb2 - 1))
2110 for (width = 8; width <= 16; width += 8)
2112 /* Can we zero-extend right away? */
2113 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2116 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2117 if (cost < best_cost)
2128 /* ??? Could try to put zero extend into initial right shift,
2129 or even shift a bit left before the right shift. */
2130 /* Determine value of first part of left shift, to get to the
2131 zero extend cut-off point. */
2132 first = width - exact_log2 (lsb2) + right;
2133 if (first >= 0 && right + left - first >= 0)
2135 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2136 + ext_shift_insns[right + left - first];
2137 if (cost < best_cost)
2149 /* Try to use r0 AND pattern */
2150 for (i = 0; i <= 2; i++)
2154 if (! CONST_OK_FOR_K08 (mask >> i))
2156 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2157 if (cost < best_cost)
2162 best_len = cost - 1;
2165 /* Try to use a scratch register to hold the AND operand. */
2166 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2167 for (i = 0; i <= 2; i++)
2171 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2172 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2173 if (cost < best_cost)
2178 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2184 attrp[0] = best_right;
2185 attrp[1] = best_len;
2190 /* This is used in length attributes of the unnamed instructions
2191 corresponding to shl_and_kind return values of 1 and 2. */
2193 shl_and_length (rtx insn)
2195 rtx set_src, left_rtx, mask_rtx;
2198 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2199 left_rtx = XEXP (XEXP (set_src, 0), 1);
2200 mask_rtx = XEXP (set_src, 1);
2201 shl_and_kind (left_rtx, mask_rtx, attributes);
2202 return attributes[1];
2205 /* This is used in length attribute of the and_shl_scratch instruction. */
2208 shl_and_scr_length (rtx insn)
2210 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2211 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2212 rtx op = XEXP (set_src, 0);
2213 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2214 op = XEXP (XEXP (op, 0), 0);
2215 return len + shift_insns[INTVAL (XEXP (op, 1))];
2218 /* Generating rtl? */
2219 extern int rtx_equal_function_value_matters;
2221 /* Generate rtl for instructions for which shl_and_kind advised a particular
2222 method of generating them, i.e. returned zero. */
2225 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2228 unsigned HOST_WIDE_INT mask;
2229 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2230 int right, total_shift;
2231 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2233 right = attributes[0];
2234 total_shift = INTVAL (left_rtx) + right;
2235 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2242 int first = attributes[2];
2247 emit_insn ((mask << right) <= 0xff
2248 ? gen_zero_extendqisi2 (dest,
2249 gen_lowpart (QImode, source))
2250 : gen_zero_extendhisi2 (dest,
2251 gen_lowpart (HImode, source)));
2255 emit_insn (gen_movsi (dest, source));
2259 operands[2] = GEN_INT (right);
2260 gen_shifty_hi_op (LSHIFTRT, operands);
2264 operands[2] = GEN_INT (first);
2265 gen_shifty_hi_op (ASHIFT, operands);
2266 total_shift -= first;
2270 emit_insn (mask <= 0xff
2271 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2272 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2273 if (total_shift > 0)
2275 operands[2] = GEN_INT (total_shift);
2276 gen_shifty_hi_op (ASHIFT, operands);
2281 shift_gen_fun = gen_shifty_op;
2283 /* If the topmost bit that matters is set, set the topmost bits
2284 that don't matter. This way, we might be able to get a shorter
2286 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2287 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2289 /* Don't expand fine-grained when combining, because that will
2290 make the pattern fail. */
2291 if (rtx_equal_function_value_matters
2292 || reload_in_progress || reload_completed)
2296 /* Cases 3 and 4 should be handled by this split
2297 only while combining */
2302 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2305 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2310 operands[2] = GEN_INT (total_shift);
2311 shift_gen_fun (ASHIFT, operands);
2318 if (kind != 4 && total_shift < 16)
2320 neg = -ext_shift_amounts[total_shift][1];
2322 neg -= ext_shift_amounts[total_shift][2];
2326 emit_insn (gen_and_shl_scratch (dest, source,
2329 GEN_INT (total_shift + neg),
2331 emit_insn (gen_movsi (dest, dest));
2338 /* Try to find a good way to implement the combiner pattern
2339 [(set (match_operand:SI 0 "register_operand" "=r")
2340 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2341 (match_operand:SI 2 "const_int_operand" "n")
2342 (match_operand:SI 3 "const_int_operand" "n")
2344 (clobber (reg:SI T_REG))]
2345 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2346 return 0 for simple left / right shift combination.
2347 return 1 for left shift / 8 bit sign extend / left shift.
2348 return 2 for left shift / 16 bit sign extend / left shift.
2349 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2350 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2351 return 5 for left shift / 16 bit sign extend / right shift
2352 return 6 for < 8 bit sign extend / left shift.
2353 return 7 for < 8 bit sign extend / left shift / single right shift.
2354 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2357 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2359 int left, size, insize, ext;
2360 int cost = 0, best_cost;
2363 left = INTVAL (left_rtx);
2364 size = INTVAL (size_rtx);
2365 insize = size - left;
2368 /* Default to left / right shift. */
2370 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2373 /* 16 bit shift / sign extend / 16 bit shift */
2374 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2375 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2376 below, by alternative 3 or something even better. */
2377 if (cost < best_cost)
2383 /* Try a plain sign extend between two shifts. */
2384 for (ext = 16; ext >= insize; ext -= 8)
2388 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2389 if (cost < best_cost)
2391 kind = ext / (unsigned) 8;
2395 /* Check if we can do a sloppy shift with a final signed shift
2396 restoring the sign. */
2397 if (EXT_SHIFT_SIGNED (size - ext))
2398 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2399 /* If not, maybe it's still cheaper to do the second shift sloppy,
2400 and do a final sign extend? */
2401 else if (size <= 16)
2402 cost = ext_shift_insns[ext - insize] + 1
2403 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2406 if (cost < best_cost)
2408 kind = ext / (unsigned) 8 + 2;
2412 /* Check if we can sign extend in r0 */
2415 cost = 3 + shift_insns[left];
2416 if (cost < best_cost)
2421 /* Try the same with a final signed shift. */
2424 cost = 3 + ext_shift_insns[left + 1] + 1;
2425 if (cost < best_cost)
2434 /* Try to use a dynamic shift. */
2435 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2436 if (cost < best_cost)
2447 /* Function to be used in the length attribute of the instructions
2448 implementing this pattern. */
2451 shl_sext_length (rtx insn)
2453 rtx set_src, left_rtx, size_rtx;
2456 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2457 left_rtx = XEXP (XEXP (set_src, 0), 1);
2458 size_rtx = XEXP (set_src, 1);
2459 shl_sext_kind (left_rtx, size_rtx, &cost);
2463 /* Generate rtl for this pattern */
2466 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2469 int left, size, insize, cost;
2472 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2473 left = INTVAL (left_rtx);
2474 size = INTVAL (size_rtx);
2475 insize = size - left;
2483 int ext = kind & 1 ? 8 : 16;
2484 int shift2 = size - ext;
2486 /* Don't expand fine-grained when combining, because that will
2487 make the pattern fail. */
2488 if (! rtx_equal_function_value_matters
2489 && ! reload_in_progress && ! reload_completed)
2491 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2492 emit_insn (gen_movsi (dest, source));
2496 emit_insn (gen_movsi (dest, source));
2500 operands[2] = GEN_INT (ext - insize);
2501 gen_shifty_hi_op (ASHIFT, operands);
2504 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2505 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2510 operands[2] = GEN_INT (shift2);
2511 gen_shifty_op (ASHIFT, operands);
2518 if (EXT_SHIFT_SIGNED (shift2))
2520 operands[2] = GEN_INT (shift2 + 1);
2521 gen_shifty_op (ASHIFT, operands);
2522 operands[2] = const1_rtx;
2523 gen_shifty_op (ASHIFTRT, operands);
2526 operands[2] = GEN_INT (shift2);
2527 gen_shifty_hi_op (ASHIFT, operands);
2531 operands[2] = GEN_INT (-shift2);
2532 gen_shifty_hi_op (LSHIFTRT, operands);
2534 emit_insn (size <= 8
2535 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2536 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2543 if (! rtx_equal_function_value_matters
2544 && ! reload_in_progress && ! reload_completed)
2545 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2549 operands[2] = GEN_INT (16 - insize);
2550 gen_shifty_hi_op (ASHIFT, operands);
2551 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2553 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2555 gen_ashift (ASHIFTRT, 1, dest);
2560 /* Don't expand fine-grained when combining, because that will
2561 make the pattern fail. */
2562 if (! rtx_equal_function_value_matters
2563 && ! reload_in_progress && ! reload_completed)
2565 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2566 emit_insn (gen_movsi (dest, source));
2569 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2570 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2571 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2573 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2574 gen_shifty_op (ASHIFT, operands);
2576 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2584 /* Prefix a symbol_ref name with "datalabel". */
2587 gen_datalabel_ref (rtx sym)
2589 if (GET_CODE (sym) == LABEL_REF)
2590 return gen_rtx_CONST (GET_MODE (sym),
2591 gen_rtx_UNSPEC (GET_MODE (sym),
2595 if (GET_CODE (sym) != SYMBOL_REF)
2602 /* The SH cannot load a large constant into a register, constants have to
2603 come from a pc relative load. The reference of a pc relative load
2604 instruction must be less than 1k infront of the instruction. This
2605 means that we often have to dump a constant inside a function, and
2606 generate code to branch around it.
2608 It is important to minimize this, since the branches will slow things
2609 down and make things bigger.
2611 Worst case code looks like:
2629 We fix this by performing a scan before scheduling, which notices which
2630 instructions need to have their operands fetched from the constant table
2631 and builds the table.
2635 scan, find an instruction which needs a pcrel move. Look forward, find the
2636 last barrier which is within MAX_COUNT bytes of the requirement.
2637 If there isn't one, make one. Process all the instructions between
2638 the find and the barrier.
2640 In the above example, we can tell that L3 is within 1k of L1, so
2641 the first move can be shrunk from the 3 insn+constant sequence into
2642 just 1 insn, and the constant moved to L3 to make:
2653 Then the second move becomes the target for the shortening process. */
2657 rtx value; /* Value in table. */
2658 rtx label; /* Label of value. */
2659 rtx wend; /* End of window. */
2660 enum machine_mode mode; /* Mode of value. */
2662 /* True if this constant is accessed as part of a post-increment
2663 sequence. Note that HImode constants are never accessed in this way. */
2664 bool part_of_sequence_p;
2667 /* The maximum number of constants that can fit into one pool, since
2668 the pc relative range is 0...1020 bytes and constants are at least 4
2671 #define MAX_POOL_SIZE (1020/4)
2672 static pool_node pool_vector[MAX_POOL_SIZE];
2673 static int pool_size;
2674 static rtx pool_window_label;
2675 static int pool_window_last;
2677 /* ??? If we need a constant in HImode which is the truncated value of a
2678 constant we need in SImode, we could combine the two entries thus saving
2679 two bytes. Is this common enough to be worth the effort of implementing
2682 /* ??? This stuff should be done at the same time that we shorten branches.
2683 As it is now, we must assume that all branches are the maximum size, and
2684 this causes us to almost always output constant pools sooner than
2687 /* Add a constant to the pool and return its label. */
2690 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2693 rtx lab, new, ref, newref;
2695 /* First see if we've already got it. */
2696 for (i = 0; i < pool_size; i++)
2698 if (x->code == pool_vector[i].value->code
2699 && mode == pool_vector[i].mode)
2701 if (x->code == CODE_LABEL)
2703 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2706 if (rtx_equal_p (x, pool_vector[i].value))
2711 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2713 new = gen_label_rtx ();
2714 LABEL_REFS (new) = pool_vector[i].label;
2715 pool_vector[i].label = lab = new;
2717 if (lab && pool_window_label)
2719 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2720 ref = pool_vector[pool_window_last].wend;
2721 LABEL_NEXTREF (newref) = ref;
2722 pool_vector[pool_window_last].wend = newref;
2725 pool_window_label = new;
2726 pool_window_last = i;
2732 /* Need a new one. */
2733 pool_vector[pool_size].value = x;
2734 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2737 pool_vector[pool_size - 1].part_of_sequence_p = true;
2740 lab = gen_label_rtx ();
2741 pool_vector[pool_size].mode = mode;
2742 pool_vector[pool_size].label = lab;
2743 pool_vector[pool_size].wend = NULL_RTX;
2744 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2745 if (lab && pool_window_label)
2747 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2748 ref = pool_vector[pool_window_last].wend;
2749 LABEL_NEXTREF (newref) = ref;
2750 pool_vector[pool_window_last].wend = newref;
2753 pool_window_label = lab;
2754 pool_window_last = pool_size;
2759 /* Output the literal table. START, if nonzero, is the first instruction
2760 this table is needed for, and also indicates that there is at least one
2761 casesi_worker_2 instruction; We have to emit the operand3 labels from
2762 these insns at a 4-byte aligned position. BARRIER is the barrier
2763 after which we are to place the table. */
2766 dump_table (rtx start, rtx barrier)
2774 /* Do two passes, first time dump out the HI sized constants. */
2776 for (i = 0; i < pool_size; i++)
2778 pool_node *p = &pool_vector[i];
2780 if (p->mode == HImode)
2784 scan = emit_insn_after (gen_align_2 (), scan);
2787 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2788 scan = emit_label_after (lab, scan);
2789 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2791 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2793 lab = XEXP (ref, 0);
2794 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2797 else if (p->mode == DFmode)
2805 scan = emit_insn_after (gen_align_4 (), scan);
2807 for (; start != barrier; start = NEXT_INSN (start))
2808 if (GET_CODE (start) == INSN
2809 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2811 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2812 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2814 scan = emit_label_after (lab, scan);
2817 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2819 rtx align_insn = NULL_RTX;
2821 scan = emit_label_after (gen_label_rtx (), scan);
2822 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2825 for (i = 0; i < pool_size; i++)
2827 pool_node *p = &pool_vector[i];
2835 if (align_insn && !p->part_of_sequence_p)
2837 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2838 emit_label_before (lab, align_insn);
2839 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2841 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2843 lab = XEXP (ref, 0);
2844 emit_insn_before (gen_consttable_window_end (lab),
2847 delete_insn (align_insn);
2848 align_insn = NULL_RTX;
2853 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2854 scan = emit_label_after (lab, scan);
2855 scan = emit_insn_after (gen_consttable_4 (p->value,
2857 need_align = ! need_align;
2863 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2868 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2869 scan = emit_label_after (lab, scan);
2870 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2878 if (p->mode != HImode)
2880 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2882 lab = XEXP (ref, 0);
2883 scan = emit_insn_after (gen_consttable_window_end (lab),
2892 for (i = 0; i < pool_size; i++)
2894 pool_node *p = &pool_vector[i];
2905 scan = emit_label_after (gen_label_rtx (), scan);
2906 scan = emit_insn_after (gen_align_4 (), scan);
2908 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2909 scan = emit_label_after (lab, scan);
2910 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2918 scan = emit_label_after (gen_label_rtx (), scan);
2919 scan = emit_insn_after (gen_align_4 (), scan);
2921 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2922 scan = emit_label_after (lab, scan);
2923 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2931 if (p->mode != HImode)
2933 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2935 lab = XEXP (ref, 0);
2936 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2941 scan = emit_insn_after (gen_consttable_end (), scan);
2942 scan = emit_barrier_after (scan);
2944 pool_window_label = NULL_RTX;
2945 pool_window_last = 0;
2948 /* Return nonzero if constant would be an ok source for a
2949 mov.w instead of a mov.l. */
2954 return (GET_CODE (src) == CONST_INT
2955 && INTVAL (src) >= -32768
2956 && INTVAL (src) <= 32767);
2959 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2961 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2962 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2963 need to fix it if the input value is CONST_OK_FOR_I08. */
2966 broken_move (rtx insn)
2968 if (GET_CODE (insn) == INSN)
2970 rtx pat = PATTERN (insn);
2971 if (GET_CODE (pat) == PARALLEL)
2972 pat = XVECEXP (pat, 0, 0);
2973 if (GET_CODE (pat) == SET
2974 /* We can load any 8 bit value if we don't care what the high
2975 order bits end up as. */
2976 && GET_MODE (SET_DEST (pat)) != QImode
2977 && (CONSTANT_P (SET_SRC (pat))
2978 /* Match mova_const. */
2979 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2980 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2981 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2983 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2984 && (fp_zero_operand (SET_SRC (pat))
2985 || fp_one_operand (SET_SRC (pat)))
2986 /* ??? If this is a -m4 or -m4-single compilation, in general
2987 we don't know the current setting of fpscr, so disable fldi.
2988 There is an exception if this was a register-register move
2989 before reload - and hence it was ascertained that we have
2990 single precision setting - and in a post-reload optimization
2991 we changed this to do a constant load. In that case
2992 we don't have an r0 clobber, hence we must use fldi. */
2993 && (! TARGET_SH4 || TARGET_FMOVD
2994 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2996 && GET_CODE (SET_DEST (pat)) == REG
2997 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2998 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2999 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3009 return (GET_CODE (insn) == INSN
3010 && GET_CODE (PATTERN (insn)) == SET
3011 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3012 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3013 /* Don't match mova_const. */
3014 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3017 /* Fix up a mova from a switch that went out of range. */
3019 fixup_mova (rtx mova)
3023 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3024 INSN_CODE (mova) = -1;
3029 rtx lab = gen_label_rtx ();
3030 rtx wpat, wpat0, wpat1, wsrc, diff;
3034 worker = NEXT_INSN (worker);
3036 || GET_CODE (worker) == CODE_LABEL
3037 || GET_CODE (worker) == JUMP_INSN)
3039 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3040 wpat = PATTERN (worker);
3041 wpat0 = XVECEXP (wpat, 0, 0);
3042 wpat1 = XVECEXP (wpat, 0, 1);
3043 wsrc = SET_SRC (wpat0);
3044 PATTERN (worker) = (gen_casesi_worker_2
3045 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3046 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3048 INSN_CODE (worker) = -1;
3049 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3050 gen_rtx_LABEL_REF (Pmode, lab));
3051 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3052 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3053 INSN_CODE (mova) = -1;
3057 /* Find the last barrier from insn FROM which is close enough to hold the
3058 constant pool. If we can't find one, then create one near the end of
3062 find_barrier (int num_mova, rtx mova, rtx from)
3071 int leading_mova = num_mova;
3072 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3076 /* For HImode: range is 510, add 4 because pc counts from address of
3077 second instruction after this one, subtract 2 for the jump instruction
3078 that we may need to emit before the table, subtract 2 for the instruction
3079 that fills the jump delay slot (in very rare cases, reorg will take an
3080 instruction from after the constant pool or will leave the delay slot
3081 empty). This gives 510.
3082 For SImode: range is 1020, add 4 because pc counts from address of
3083 second instruction after this one, subtract 2 in case pc is 2 byte
3084 aligned, subtract 2 for the jump instruction that we may need to emit
3085 before the table, subtract 2 for the instruction that fills the jump
3086 delay slot. This gives 1018. */
3088 /* The branch will always be shortened now that the reference address for
3089 forward branches is the successor address, thus we need no longer make
3090 adjustments to the [sh]i_limit for -O0. */
3095 while (from && count_si < si_limit && count_hi < hi_limit)
3097 int inc = get_attr_length (from);
3100 if (GET_CODE (from) == CODE_LABEL)
3103 new_align = 1 << label_to_alignment (from);
3104 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3105 new_align = 1 << barrier_align (from);
3111 if (GET_CODE (from) == BARRIER)
3114 found_barrier = from;
3116 /* If we are at the end of the function, or in front of an alignment
3117 instruction, we need not insert an extra alignment. We prefer
3118 this kind of barrier. */
3119 if (barrier_align (from) > 2)
3120 good_barrier = from;
3123 if (broken_move (from))
3126 enum machine_mode mode;
3128 pat = PATTERN (from);
3129 if (GET_CODE (pat) == PARALLEL)
3130 pat = XVECEXP (pat, 0, 0);
3131 src = SET_SRC (pat);
3132 dst = SET_DEST (pat);
3133 mode = GET_MODE (dst);
3135 /* We must explicitly check the mode, because sometimes the
3136 front end will generate code to load unsigned constants into
3137 HImode targets without properly sign extending them. */
3139 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3142 /* We put the short constants before the long constants, so
3143 we must count the length of short constants in the range
3144 for the long constants. */
3145 /* ??? This isn't optimal, but is easy to do. */
3150 /* We dump DF/DI constants before SF/SI ones, because
3151 the limit is the same, but the alignment requirements
3152 are higher. We may waste up to 4 additional bytes
3153 for alignment, and the DF/DI constant may have
3154 another SF/SI constant placed before it. */
3155 if (TARGET_SHCOMPACT
3157 && (mode == DFmode || mode == DImode))
3162 while (si_align > 2 && found_si + si_align - 2 > count_si)
3164 if (found_si > count_si)
3165 count_si = found_si;
3166 found_si += GET_MODE_SIZE (mode);
3168 si_limit -= GET_MODE_SIZE (mode);
3171 /* See the code in machine_dependent_reorg, which has a similar if
3172 statement that generates a new mova insn in many cases. */
3173 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3183 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3185 if (found_si > count_si)
3186 count_si = found_si;
3188 else if (GET_CODE (from) == JUMP_INSN
3189 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3190 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3194 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3196 /* We have just passed the barrier in front of the
3197 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3198 the ADDR_DIFF_VEC is accessed as data, just like our pool
3199 constants, this is a good opportunity to accommodate what
3200 we have gathered so far.
3201 If we waited any longer, we could end up at a barrier in
3202 front of code, which gives worse cache usage for separated
3203 instruction / data caches. */
3204 good_barrier = found_barrier;
3209 rtx body = PATTERN (from);
3210 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3213 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3214 else if (GET_CODE (from) == JUMP_INSN
3216 && ! TARGET_SMALLCODE)
3222 if (new_align > si_align)
3224 si_limit -= (count_si - 1) & (new_align - si_align);
3225 si_align = new_align;
3227 count_si = (count_si + new_align - 1) & -new_align;
3232 if (new_align > hi_align)
3234 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3235 hi_align = new_align;
3237 count_hi = (count_hi + new_align - 1) & -new_align;
3239 from = NEXT_INSN (from);
3246 /* Try as we might, the leading mova is out of range. Change
3247 it into a load (which will become a pcload) and retry. */
3249 return find_barrier (0, 0, mova);
3253 /* Insert the constant pool table before the mova instruction,
3254 to prevent the mova label reference from going out of range. */
3256 good_barrier = found_barrier = barrier_before_mova;
3262 if (good_barrier && next_real_insn (found_barrier))
3263 found_barrier = good_barrier;
3267 /* We didn't find a barrier in time to dump our stuff,
3268 so we'll make one. */
3269 rtx label = gen_label_rtx ();
3271 /* If we exceeded the range, then we must back up over the last
3272 instruction we looked at. Otherwise, we just need to undo the
3273 NEXT_INSN at the end of the loop. */
3274 if (count_hi > hi_limit || count_si > si_limit)
3275 from = PREV_INSN (PREV_INSN (from));
3277 from = PREV_INSN (from);
3279 /* Walk back to be just before any jump or label.
3280 Putting it before a label reduces the number of times the branch
3281 around the constant pool table will be hit. Putting it before
3282 a jump makes it more likely that the bra delay slot will be
3284 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3285 || GET_CODE (from) == CODE_LABEL)
3286 from = PREV_INSN (from);
3288 from = emit_jump_insn_after (gen_jump (label), from);
3289 JUMP_LABEL (from) = label;
3290 LABEL_NUSES (label) = 1;
3291 found_barrier = emit_barrier_after (from);
3292 emit_label_after (label, found_barrier);
3295 return found_barrier;
3298 /* If the instruction INSN is implemented by a special function, and we can
3299 positively find the register that is used to call the sfunc, and this
3300 register is not used anywhere else in this instruction - except as the
3301 destination of a set, return this register; else, return 0. */
3303 sfunc_uses_reg (rtx insn)
3306 rtx pattern, part, reg_part, reg;
3308 if (GET_CODE (insn) != INSN)
3310 pattern = PATTERN (insn);
3311 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3314 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3316 part = XVECEXP (pattern, 0, i);
3317 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3322 reg = XEXP (reg_part, 0);
3323 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3325 part = XVECEXP (pattern, 0, i);
3326 if (part == reg_part || GET_CODE (part) == CLOBBER)
3328 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3329 && GET_CODE (SET_DEST (part)) == REG)
3330 ? SET_SRC (part) : part)))
3336 /* See if the only way in which INSN uses REG is by calling it, or by
3337 setting it while calling it. Set *SET to a SET rtx if the register
3341 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3347 reg2 = sfunc_uses_reg (insn);
3348 if (reg2 && REGNO (reg2) == REGNO (reg))
3350 pattern = single_set (insn);
3352 && GET_CODE (SET_DEST (pattern)) == REG
3353 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3357 if (GET_CODE (insn) != CALL_INSN)
3359 /* We don't use rtx_equal_p because we don't care if the mode is
3361 pattern = single_set (insn);
3363 && GET_CODE (SET_DEST (pattern)) == REG
3364 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3370 par = PATTERN (insn);
3371 if (GET_CODE (par) == PARALLEL)
3372 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3374 part = XVECEXP (par, 0, i);
3375 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3378 return reg_mentioned_p (reg, SET_SRC (pattern));
3384 pattern = PATTERN (insn);
3386 if (GET_CODE (pattern) == PARALLEL)
3390 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3391 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3393 pattern = XVECEXP (pattern, 0, 0);
3396 if (GET_CODE (pattern) == SET)
3398 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3400 /* We don't use rtx_equal_p, because we don't care if the
3401 mode is different. */
3402 if (GET_CODE (SET_DEST (pattern)) != REG
3403 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3409 pattern = SET_SRC (pattern);
3412 if (GET_CODE (pattern) != CALL
3413 || GET_CODE (XEXP (pattern, 0)) != MEM
3414 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3420 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3421 general registers. Bits 0..15 mean that the respective registers
3422 are used as inputs in the instruction. Bits 16..31 mean that the
3423 registers 0..15, respectively, are used as outputs, or are clobbered.
3424 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3426 regs_used (rtx x, int is_dest)
3434 code = GET_CODE (x);
3439 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3440 << (REGNO (x) + is_dest));
3444 rtx y = SUBREG_REG (x);
3446 if (GET_CODE (y) != REG)
3449 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3451 subreg_regno_offset (REGNO (y),
3454 GET_MODE (x)) + is_dest));
3458 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3460 /* If there was a return value, it must have been indicated with USE. */
3475 fmt = GET_RTX_FORMAT (code);
3477 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3482 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3483 used |= regs_used (XVECEXP (x, i, j), is_dest);
3485 else if (fmt[i] == 'e')
3486 used |= regs_used (XEXP (x, i), is_dest);
3491 /* Create an instruction that prevents redirection of a conditional branch
3492 to the destination of the JUMP with address ADDR.
3493 If the branch needs to be implemented as an indirect jump, try to find
3494 a scratch register for it.
3495 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3496 If any preceding insn that doesn't fit into a delay slot is good enough,
3497 pass 1. Pass 2 if a definite blocking insn is needed.
3498 -1 is used internally to avoid deep recursion.
3499 If a blocking instruction is made or recognized, return it. */
3502 gen_block_redirect (rtx jump, int addr, int need_block)
3505 rtx prev = prev_nonnote_insn (jump);
3508 /* First, check if we already have an instruction that satisfies our need. */
3509 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3511 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3513 if (GET_CODE (PATTERN (prev)) == USE
3514 || GET_CODE (PATTERN (prev)) == CLOBBER
3515 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3517 else if ((need_block &= ~1) < 0)
3519 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3522 if (GET_CODE (PATTERN (jump)) == RETURN)
3526 /* Reorg even does nasty things with return insns that cause branches
3527 to go out of range - see find_end_label and callers. */
3528 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3530 /* We can't use JUMP_LABEL here because it might be undefined
3531 when not optimizing. */
3532 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3533 /* If the branch is out of range, try to find a scratch register for it. */
3535 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3539 /* Don't look for the stack pointer as a scratch register,
3540 it would cause trouble if an interrupt occurred. */
3541 unsigned try = 0x7fff, used;
3542 int jump_left = flag_expensive_optimizations + 1;
3544 /* It is likely that the most recent eligible instruction is wanted for
3545 the delay slot. Therefore, find out which registers it uses, and
3546 try to avoid using them. */
3548 for (scan = jump; (scan = PREV_INSN (scan)); )
3552 if (INSN_DELETED_P (scan))
3554 code = GET_CODE (scan);
3555 if (code == CODE_LABEL || code == JUMP_INSN)
3558 && GET_CODE (PATTERN (scan)) != USE
3559 && GET_CODE (PATTERN (scan)) != CLOBBER
3560 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3562 try &= ~regs_used (PATTERN (scan), 0);
3566 for (used = dead = 0, scan = JUMP_LABEL (jump);
3567 (scan = NEXT_INSN (scan)); )
3571 if (INSN_DELETED_P (scan))
3573 code = GET_CODE (scan);
3576 used |= regs_used (PATTERN (scan), 0);
3577 if (code == CALL_INSN)
3578 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3579 dead |= (used >> 16) & ~used;
3585 if (code == JUMP_INSN)
3587 if (jump_left-- && simplejump_p (scan))
3588 scan = JUMP_LABEL (scan);
3594 /* Mask out the stack pointer again, in case it was
3595 the only 'free' register we have found. */
3598 /* If the immediate destination is still in range, check for possible
3599 threading with a jump beyond the delay slot insn.
3600 Don't check if we are called recursively; the jump has been or will be
3601 checked in a different invocation then. */
3603 else if (optimize && need_block >= 0)
3605 rtx next = next_active_insn (next_active_insn (dest));
3606 if (next && GET_CODE (next) == JUMP_INSN
3607 && GET_CODE (PATTERN (next)) == SET
3608 && recog_memoized (next) == CODE_FOR_jump_compact)
3610 dest = JUMP_LABEL (next);
3612 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3614 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3620 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3622 /* It would be nice if we could convert the jump into an indirect
3623 jump / far branch right now, and thus exposing all constituent
3624 instructions to further optimization. However, reorg uses
3625 simplejump_p to determine if there is an unconditional jump where
3626 it should try to schedule instructions from the target of the
3627 branch; simplejump_p fails for indirect jumps even if they have
3629 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3630 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3632 /* ??? We would like this to have the scope of the jump, but that
3633 scope will change when a delay slot insn of an inner scope is added.
3634 Hence, after delay slot scheduling, we'll have to expect
3635 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3638 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3639 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3642 else if (need_block)
3643 /* We can't use JUMP_LABEL here because it might be undefined
3644 when not optimizing. */
3645 return emit_insn_before (gen_block_branch_redirect
3646 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3651 #define CONDJUMP_MIN -252
3652 #define CONDJUMP_MAX 262
3655 /* A label (to be placed) in front of the jump
3656 that jumps to our ultimate destination. */
3658 /* Where we are going to insert it if we cannot move the jump any farther,
3659 or the jump itself if we have picked up an existing jump. */
3661 /* The ultimate destination. */
3663 struct far_branch *prev;
3664 /* If the branch has already been created, its address;
3665 else the address of its first prospective user. */
3669 static void gen_far_branch (struct far_branch *);
3670 enum mdep_reorg_phase_e mdep_reorg_phase;
3672 gen_far_branch (struct far_branch *bp)
3674 rtx insn = bp->insert_place;
3676 rtx label = gen_label_rtx ();
3678 emit_label_after (label, insn);
3681 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3682 LABEL_NUSES (bp->far_label)++;
3685 jump = emit_jump_insn_after (gen_return (), insn);
3686 /* Emit a barrier so that reorg knows that any following instructions
3687 are not reachable via a fall-through path.
3688 But don't do this when not optimizing, since we wouldn't suppress the
3689 alignment for the barrier then, and could end up with out-of-range
3690 pc-relative loads. */
3692 emit_barrier_after (jump);
3693 emit_label_after (bp->near_label, insn);
3694 JUMP_LABEL (jump) = bp->far_label;
3695 if (! invert_jump (insn, label, 1))
3697 /* If we are branching around a jump (rather than a return), prevent
3698 reorg from using an insn from the jump target as the delay slot insn -
3699 when reorg did this, it pessimized code (we rather hide the delay slot)
3700 and it could cause branches to go out of range. */
3703 (gen_stuff_delay_slot
3704 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3705 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3707 /* Prevent reorg from undoing our splits. */
3708 gen_block_redirect (jump, bp->address += 2, 2);
3711 /* Fix up ADDR_DIFF_VECs. */
3713 fixup_addr_diff_vecs (rtx first)
3717 for (insn = first; insn; insn = NEXT_INSN (insn))
3719 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3721 if (GET_CODE (insn) != JUMP_INSN
3722 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3724 pat = PATTERN (insn);
3725 vec_lab = XEXP (XEXP (pat, 0), 0);
3727 /* Search the matching casesi_jump_2. */
3728 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3730 if (GET_CODE (prev) != JUMP_INSN)
3732 prevpat = PATTERN (prev);
3733 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3735 x = XVECEXP (prevpat, 0, 1);
3736 if (GET_CODE (x) != USE)
3739 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3743 /* Emit the reference label of the braf where it belongs, right after
3744 the casesi_jump_2 (i.e. braf). */
3745 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3746 emit_label_after (braf_label, prev);
3748 /* Fix up the ADDR_DIF_VEC to be relative
3749 to the reference address of the braf. */
3750 XEXP (XEXP (pat, 0), 0) = braf_label;
3754 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3755 a barrier. Return the base 2 logarithm of the desired alignment. */
3757 barrier_align (rtx barrier_or_label)
3759 rtx next = next_real_insn (barrier_or_label), pat, prev;
3760 int slot, credit, jump_to_next = 0;
3765 pat = PATTERN (next);
3767 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3770 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3771 /* This is a barrier in front of a constant table. */
3774 prev = prev_real_insn (barrier_or_label);
3775 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3777 pat = PATTERN (prev);
3778 /* If this is a very small table, we want to keep the alignment after
3779 the table to the minimum for proper code alignment. */
3780 return ((TARGET_SMALLCODE
3781 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3782 <= (unsigned) 1 << (CACHE_LOG - 2)))
3783 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3786 if (TARGET_SMALLCODE)
3789 if (! TARGET_SH2 || ! optimize)
3790 return align_jumps_log;
3792 /* When fixing up pcloads, a constant table might be inserted just before
3793 the basic block that ends with the barrier. Thus, we can't trust the
3794 instruction lengths before that. */
3795 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3797 /* Check if there is an immediately preceding branch to the insn beyond
3798 the barrier. We must weight the cost of discarding useful information
3799 from the current cache line when executing this branch and there is
3800 an alignment, against that of fetching unneeded insn in front of the
3801 branch target when there is no alignment. */
3803 /* There are two delay_slot cases to consider. One is the simple case
3804 where the preceding branch is to the insn beyond the barrier (simple
3805 delay slot filling), and the other is where the preceding branch has
3806 a delay slot that is a duplicate of the insn after the barrier
3807 (fill_eager_delay_slots) and the branch is to the insn after the insn
3808 after the barrier. */
3810 /* PREV is presumed to be the JUMP_INSN for the barrier under
3811 investigation. Skip to the insn before it. */
3812 prev = prev_real_insn (prev);
3814 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3815 credit >= 0 && prev && GET_CODE (prev) == INSN;
3816 prev = prev_real_insn (prev))
3819 if (GET_CODE (PATTERN (prev)) == USE
3820 || GET_CODE (PATTERN (prev)) == CLOBBER)
3822 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3824 prev = XVECEXP (PATTERN (prev), 0, 1);
3825 if (INSN_UID (prev) == INSN_UID (next))
3827 /* Delay slot was filled with insn at jump target. */
3834 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3836 credit -= get_attr_length (prev);
3839 && GET_CODE (prev) == JUMP_INSN
3840 && JUMP_LABEL (prev))
3844 || next_real_insn (JUMP_LABEL (prev)) == next
3845 /* If relax_delay_slots() decides NEXT was redundant
3846 with some previous instruction, it will have
3847 redirected PREV's jump to the following insn. */
3848 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3849 /* There is no upper bound on redundant instructions
3850 that might have been skipped, but we must not put an
3851 alignment where none had been before. */
3852 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3854 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3855 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3856 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3858 rtx pat = PATTERN (prev);
3859 if (GET_CODE (pat) == PARALLEL)
3860 pat = XVECEXP (pat, 0, 0);
3861 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3867 return align_jumps_log;
3870 /* If we are inside a phony loop, almost any kind of label can turn up as the
3871 first one in the loop. Aligning a braf label causes incorrect switch
3872 destination addresses; we can detect braf labels because they are
3873 followed by a BARRIER.
3874 Applying loop alignment to small constant or switch tables is a waste
3875 of space, so we suppress this too. */
3877 sh_loop_align (rtx label)
3882 next = next_nonnote_insn (next);
3883 while (next && GET_CODE (next) == CODE_LABEL);
3887 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3888 || recog_memoized (next) == CODE_FOR_consttable_2)
3891 return align_loops_log;
3894 /* Do a final pass over the function, just before delayed branch
3900 rtx first, insn, mova = NULL_RTX;
3902 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3903 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3905 first = get_insns ();
3907 /* We must split call insns before introducing `mova's. If we're
3908 optimizing, they'll have already been split. Otherwise, make
3909 sure we don't split them too late. */
3911 split_all_insns_noflow ();
3916 /* If relaxing, generate pseudo-ops to associate function calls with
3917 the symbols they call. It does no harm to not generate these
3918 pseudo-ops. However, when we can generate them, it enables to
3919 linker to potentially relax the jsr to a bsr, and eliminate the
3920 register load and, possibly, the constant pool entry. */
3922 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3925 /* Remove all REG_LABEL notes. We want to use them for our own
3926 purposes. This works because none of the remaining passes
3927 need to look at them.
3929 ??? But it may break in the future. We should use a machine
3930 dependent REG_NOTE, or some other approach entirely. */
3931 for (insn = first; insn; insn = NEXT_INSN (insn))
3937 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3938 remove_note (insn, note);
3942 for (insn = first; insn; insn = NEXT_INSN (insn))
3944 rtx pattern, reg, link, set, scan, dies, label;
3945 int rescan = 0, foundinsn = 0;
3947 if (GET_CODE (insn) == CALL_INSN)
3949 pattern = PATTERN (insn);
3951 if (GET_CODE (pattern) == PARALLEL)
3952 pattern = XVECEXP (pattern, 0, 0);
3953 if (GET_CODE (pattern) == SET)
3954 pattern = SET_SRC (pattern);
3956 if (GET_CODE (pattern) != CALL
3957 || GET_CODE (XEXP (pattern, 0)) != MEM)
3960 reg = XEXP (XEXP (pattern, 0), 0);
3964 reg = sfunc_uses_reg (insn);
3969 if (GET_CODE (reg) != REG)
3972 /* This is a function call via REG. If the only uses of REG
3973 between the time that it is set and the time that it dies
3974 are in function calls, then we can associate all the
3975 function calls with the setting of REG. */
3977 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3979 if (REG_NOTE_KIND (link) != 0)
3981 set = single_set (XEXP (link, 0));
3982 if (set && rtx_equal_p (reg, SET_DEST (set)))
3984 link = XEXP (link, 0);
3991 /* ??? Sometimes global register allocation will have
3992 deleted the insn pointed to by LOG_LINKS. Try
3993 scanning backward to find where the register is set. */
3994 for (scan = PREV_INSN (insn);
3995 scan && GET_CODE (scan) != CODE_LABEL;
3996 scan = PREV_INSN (scan))
3998 if (! INSN_P (scan))
4001 if (! reg_mentioned_p (reg, scan))
4004 if (noncall_uses_reg (reg, scan, &set))
4018 /* The register is set at LINK. */
4020 /* We can only optimize the function call if the register is
4021 being set to a symbol. In theory, we could sometimes
4022 optimize calls to a constant location, but the assembler
4023 and linker do not support that at present. */
4024 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4025 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4028 /* Scan forward from LINK to the place where REG dies, and
4029 make sure that the only insns which use REG are
4030 themselves function calls. */
4032 /* ??? This doesn't work for call targets that were allocated
4033 by reload, since there may not be a REG_DEAD note for the
4037 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4041 /* Don't try to trace forward past a CODE_LABEL if we haven't
4042 seen INSN yet. Ordinarily, we will only find the setting insn
4043 in LOG_LINKS if it is in the same basic block. However,
4044 cross-jumping can insert code labels in between the load and
4045 the call, and can result in situations where a single call
4046 insn may have two targets depending on where we came from. */
4048 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4051 if (! INSN_P (scan))
4054 /* Don't try to trace forward past a JUMP. To optimize
4055 safely, we would have to check that all the
4056 instructions at the jump destination did not use REG. */
4058 if (GET_CODE (scan) == JUMP_INSN)
4061 if (! reg_mentioned_p (reg, scan))
4064 if (noncall_uses_reg (reg, scan, &scanset))
4071 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4073 /* There is a function call to this register other
4074 than the one we are checking. If we optimize
4075 this call, we need to rescan again below. */
4079 /* ??? We shouldn't have to worry about SCANSET here.
4080 We should just be able to check for a REG_DEAD note
4081 on a function call. However, the REG_DEAD notes are
4082 apparently not dependable around libcalls; c-torture
4083 execute/920501-2 is a test case. If SCANSET is set,
4084 then this insn sets the register, so it must have
4085 died earlier. Unfortunately, this will only handle
4086 the cases in which the register is, in fact, set in a
4089 /* ??? We shouldn't have to use FOUNDINSN here.
4090 However, the LOG_LINKS fields are apparently not
4091 entirely reliable around libcalls;
4092 newlib/libm/math/e_pow.c is a test case. Sometimes
4093 an insn will appear in LOG_LINKS even though it is
4094 not the most recent insn which sets the register. */
4098 || find_reg_note (scan, REG_DEAD, reg)))
4107 /* Either there was a branch, or some insn used REG
4108 other than as a function call address. */
4112 /* Create a code label, and put it in a REG_LABEL note on
4113 the insn which sets the register, and on each call insn
4114 which uses the register. In final_prescan_insn we look
4115 for the REG_LABEL notes, and output the appropriate label
4118 label = gen_label_rtx ();
4119 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4121 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4130 scan = NEXT_INSN (scan);
4132 && ((GET_CODE (scan) == CALL_INSN
4133 && reg_mentioned_p (reg, scan))
4134 || ((reg2 = sfunc_uses_reg (scan))
4135 && REGNO (reg2) == REGNO (reg))))
4137 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4139 while (scan != dies);
4145 fixup_addr_diff_vecs (first);
4149 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4150 shorten_branches (first);
4152 /* Scan the function looking for move instructions which have to be
4153 changed to pc-relative loads and insert the literal tables. */
4155 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4156 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4160 /* ??? basic block reordering can move a switch table dispatch
4161 below the switch table. Check if that has happened.
4162 We only have the addresses available when optimizing; but then,
4163 this check shouldn't be needed when not optimizing. */
4164 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4166 && (INSN_ADDRESSES (INSN_UID (insn))
4167 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4169 /* Change the mova into a load.
4170 broken_move will then return true for it. */
4173 else if (! num_mova++)
4176 else if (GET_CODE (insn) == JUMP_INSN
4177 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4185 /* Some code might have been inserted between the mova and
4186 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4187 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4188 total += get_attr_length (scan);
4190 /* range of mova is 1020, add 4 because pc counts from address of
4191 second instruction after this one, subtract 2 in case pc is 2
4192 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4193 cancels out with alignment effects of the mova itself. */
4196 /* Change the mova into a load, and restart scanning
4197 there. broken_move will then return true for mova. */
4202 if (broken_move (insn)
4203 || (GET_CODE (insn) == INSN
4204 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4207 /* Scan ahead looking for a barrier to stick the constant table
4209 rtx barrier = find_barrier (num_mova, mova, insn);
4210 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4211 int need_aligned_label = 0;
4213 if (num_mova && ! mova_p (mova))
4215 /* find_barrier had to change the first mova into a
4216 pcload; thus, we have to start with this new pcload. */
4220 /* Now find all the moves between the points and modify them. */
4221 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4223 if (GET_CODE (scan) == CODE_LABEL)
4225 if (GET_CODE (scan) == INSN
4226 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4227 need_aligned_label = 1;
4228 if (broken_move (scan))
4230 rtx *patp = &PATTERN (scan), pat = *patp;
4234 enum machine_mode mode;
4236 if (GET_CODE (pat) == PARALLEL)
4237 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4238 src = SET_SRC (pat);
4239 dst = SET_DEST (pat);
4240 mode = GET_MODE (dst);
4242 if (mode == SImode && hi_const (src)
4243 && REGNO (dst) != FPUL_REG)
4248 while (GET_CODE (dst) == SUBREG)
4250 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4251 GET_MODE (SUBREG_REG (dst)),
4254 dst = SUBREG_REG (dst);
4256 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4258 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4260 /* This must be an insn that clobbers r0. */
4261 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4262 XVECLEN (PATTERN (scan), 0)
4264 rtx clobber = *clobberp;
4266 if (GET_CODE (clobber) != CLOBBER
4267 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4271 && reg_set_between_p (r0_rtx, last_float_move, scan))
4275 && GET_MODE_SIZE (mode) != 4
4276 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4278 lab = add_constant (src, mode, last_float);
4280 emit_insn_before (gen_mova (lab), scan);
4283 /* There will be a REG_UNUSED note for r0 on
4284 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4285 lest reorg:mark_target_live_regs will not
4286 consider r0 to be used, and we end up with delay
4287 slot insn in front of SCAN that clobbers r0. */
4289 = find_regno_note (last_float_move, REG_UNUSED, 0);
4291 /* If we are not optimizing, then there may not be
4294 PUT_MODE (note, REG_INC);
4296 *last_float_addr = r0_inc_rtx;
4298 last_float_move = scan;
4300 newsrc = gen_rtx_MEM (mode,
4301 (((TARGET_SH4 && ! TARGET_FMOVD)
4302 || REGNO (dst) == FPUL_REG)
4305 last_float_addr = &XEXP (newsrc, 0);
4307 /* Remove the clobber of r0. */
4308 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4309 gen_rtx_SCRATCH (Pmode));
4310 RTX_UNCHANGING_P (newsrc) = 1;
4312 /* This is a mova needing a label. Create it. */
4313 else if (GET_CODE (src) == UNSPEC
4314 && XINT (src, 1) == UNSPEC_MOVA
4315 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4317 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4318 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4319 newsrc = gen_rtx_UNSPEC (SImode,
4320 gen_rtvec (1, newsrc),
4325 lab = add_constant (src, mode, 0);
4326 newsrc = gen_rtx_MEM (mode,
4327 gen_rtx_LABEL_REF (VOIDmode, lab));
4328 RTX_UNCHANGING_P (newsrc) = 1;
4330 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4331 INSN_CODE (scan) = -1;
4334 dump_table (need_aligned_label ? insn : 0, barrier);
4339 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4340 INSN_ADDRESSES_FREE ();
4341 split_branches (first);
4343 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4344 also has an effect on the register that holds the address of the sfunc.
4345 Insert an extra dummy insn in front of each sfunc that pretends to
4346 use this register. */
4347 if (flag_delayed_branch)
4349 for (insn = first; insn; insn = NEXT_INSN (insn))
4351 rtx reg = sfunc_uses_reg (insn);
4355 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4359 /* fpscr is not actually a user variable, but we pretend it is for the
4360 sake of the previous optimization passes, since we want it handled like
4361 one. However, we don't have any debugging information for it, so turn
4362 it into a non-user variable now. */
4364 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4366 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4370 get_dest_uid (rtx label, int max_uid)
4372 rtx dest = next_real_insn (label);
4375 /* This can happen for an undefined label. */
4377 dest_uid = INSN_UID (dest);
4378 /* If this is a newly created branch redirection blocking instruction,
4379 we cannot index the branch_uid or insn_addresses arrays with its
4380 uid. But then, we won't need to, because the actual destination is
4381 the following branch. */
4382 while (dest_uid >= max_uid)
4384 dest = NEXT_INSN (dest);
4385 dest_uid = INSN_UID (dest);
4387 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4392 /* Split condbranches that are out of range. Also add clobbers for
4393 scratch registers that are needed in far jumps.
4394 We do this before delay slot scheduling, so that it can take our
4395 newly created instructions into account. It also allows us to
4396 find branches with common targets more easily. */
4399 split_branches (rtx first)
4402 struct far_branch **uid_branch, *far_branch_list = 0;
4403 int max_uid = get_max_uid ();
4405 /* Find out which branches are out of range. */
4406 shorten_branches (first);
4408 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4409 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4411 for (insn = first; insn; insn = NEXT_INSN (insn))
4412 if (! INSN_P (insn))
4414 else if (INSN_DELETED_P (insn))
4416 /* Shorten_branches would split this instruction again,
4417 so transform it into a note. */
4418 PUT_CODE (insn, NOTE);
4419 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4420 NOTE_SOURCE_FILE (insn) = 0;
4422 else if (GET_CODE (insn) == JUMP_INSN
4423 /* Don't mess with ADDR_DIFF_VEC */
4424 && (GET_CODE (PATTERN (insn)) == SET
4425 || GET_CODE (PATTERN (insn)) == RETURN))
4427 enum attr_type type = get_attr_type (insn);
4428 if (type == TYPE_CBRANCH)
4432 if (get_attr_length (insn) > 4)
4434 rtx src = SET_SRC (PATTERN (insn));
4435 rtx olabel = XEXP (XEXP (src, 1), 0);
4436 int addr = INSN_ADDRESSES (INSN_UID (insn));
4438 int dest_uid = get_dest_uid (olabel, max_uid);
4439 struct far_branch *bp = uid_branch[dest_uid];
4441 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4442 the label if the LABEL_NUSES count drops to zero. There is
4443 always a jump_optimize pass that sets these values, but it
4444 proceeds to delete unreferenced code, and then if not
4445 optimizing, to un-delete the deleted instructions, thus
4446 leaving labels with too low uses counts. */
4449 JUMP_LABEL (insn) = olabel;
4450 LABEL_NUSES (olabel)++;
4454 bp = (struct far_branch *) alloca (sizeof *bp);
4455 uid_branch[dest_uid] = bp;
4456 bp->prev = far_branch_list;
4457 far_branch_list = bp;
4459 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4460 LABEL_NUSES (bp->far_label)++;
4464 label = bp->near_label;
4465 if (! label && bp->address - addr >= CONDJUMP_MIN)
4467 rtx block = bp->insert_place;
4469 if (GET_CODE (PATTERN (block)) == RETURN)
4470 block = PREV_INSN (block);
4472 block = gen_block_redirect (block,
4474 label = emit_label_after (gen_label_rtx (),
4476 bp->near_label = label;
4478 else if (label && ! NEXT_INSN (label))
4480 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4481 bp->insert_place = insn;
4483 gen_far_branch (bp);
4487 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4489 bp->near_label = label = gen_label_rtx ();
4490 bp->insert_place = insn;
4493 if (! redirect_jump (insn, label, 1))
4498 /* get_attr_length (insn) == 2 */
4499 /* Check if we have a pattern where reorg wants to redirect
4500 the branch to a label from an unconditional branch that
4502 /* We can't use JUMP_LABEL here because it might be undefined
4503 when not optimizing. */
4504 /* A syntax error might cause beyond to be NULL_RTX. */
4506 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4510 && (GET_CODE (beyond) == JUMP_INSN
4511 || ((beyond = next_active_insn (beyond))
4512 && GET_CODE (beyond) == JUMP_INSN))
4513 && GET_CODE (PATTERN (beyond)) == SET
4514 && recog_memoized (beyond) == CODE_FOR_jump_compact
4516 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4517 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4519 gen_block_redirect (beyond,
4520 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4523 next = next_active_insn (insn);
4525 if ((GET_CODE (next) == JUMP_INSN
4526 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4527 && GET_CODE (PATTERN (next)) == SET
4528 && recog_memoized (next) == CODE_FOR_jump_compact
4530 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4531 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4533 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4535 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4537 int addr = INSN_ADDRESSES (INSN_UID (insn));
4540 struct far_branch *bp;
4542 if (type == TYPE_JUMP)
4544 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4545 dest_uid = get_dest_uid (far_label, max_uid);
4548 /* Parse errors can lead to labels outside
4550 if (! NEXT_INSN (far_label))
4555 JUMP_LABEL (insn) = far_label;
4556 LABEL_NUSES (far_label)++;
4558 redirect_jump (insn, NULL_RTX, 1);
4562 bp = uid_branch[dest_uid];
4565 bp = (struct far_branch *) alloca (sizeof *bp);
4566 uid_branch[dest_uid] = bp;
4567 bp->prev = far_branch_list;
4568 far_branch_list = bp;
4570 bp->far_label = far_label;
4572 LABEL_NUSES (far_label)++;
4574 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4575 if (addr - bp->address <= CONDJUMP_MAX)
4576 emit_label_after (bp->near_label, PREV_INSN (insn));
4579 gen_far_branch (bp);
4585 bp->insert_place = insn;
4587 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4589 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4592 /* Generate all pending far branches,
4593 and free our references to the far labels. */
4594 while (far_branch_list)
4596 if (far_branch_list->near_label
4597 && ! NEXT_INSN (far_branch_list->near_label))
4598 gen_far_branch (far_branch_list);
4600 && far_branch_list->far_label
4601 && ! --LABEL_NUSES (far_branch_list->far_label))
4602 delete_insn (far_branch_list->far_label);
4603 far_branch_list = far_branch_list->prev;
4606 /* Instruction length information is no longer valid due to the new
4607 instructions that have been generated. */
4608 init_insn_lengths ();
4611 /* Dump out instruction addresses, which is useful for debugging the
4612 constant pool table stuff.
4614 If relaxing, output the label and pseudo-ops used to link together
4615 calls and the instruction which set the registers. */
4617 /* ??? The addresses printed by this routine for insns are nonsense for
4618 insns which are inside of a sequence where none of the inner insns have
4619 variable length. This is because the second pass of shorten_branches
4620 does not bother to update them. */
4623 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4624 int noperands ATTRIBUTE_UNUSED)
4626 if (TARGET_DUMPISIZE)
4627 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4633 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4638 pattern = PATTERN (insn);
4639 if (GET_CODE (pattern) == PARALLEL)
4640 pattern = XVECEXP (pattern, 0, 0);
4641 if (GET_CODE (pattern) == CALL
4642 || (GET_CODE (pattern) == SET
4643 && (GET_CODE (SET_SRC (pattern)) == CALL
4644 || get_attr_type (insn) == TYPE_SFUNC)))
4645 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4646 CODE_LABEL_NUMBER (XEXP (note, 0)));
4647 else if (GET_CODE (pattern) == SET)
4648 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4649 CODE_LABEL_NUMBER (XEXP (note, 0)));
4656 /* Dump out any constants accumulated in the final pass. These will
4660 output_jump_label_table (void)
4666 fprintf (asm_out_file, "\t.align 2\n");
4667 for (i = 0; i < pool_size; i++)
4669 pool_node *p = &pool_vector[i];
4671 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4672 CODE_LABEL_NUMBER (p->label));
4673 output_asm_insn (".long %O0", &p->value);
4681 /* A full frame looks like:
4685 [ if current_function_anonymous_args
4698 local-0 <- fp points here. */
4700 /* Number of bytes pushed for anonymous args, used to pass information
4701 between expand_prologue and expand_epilogue. */
4703 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4704 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4705 for an epilogue and a negative value means that it's for a sibcall
4706 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4707 all the registers that are about to be restored, and hence dead. */
4710 output_stack_adjust (int size, rtx reg, int epilogue_p,
4711 HARD_REG_SET *live_regs_mask)
4713 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4716 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4721 if (CONST_OK_FOR_ADD (size))
4722 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4723 /* Try to do it with two partial adjustments; however, we must make
4724 sure that the stack is properly aligned at all times, in case
4725 an interrupt occurs between the two partial adjustments. */
4726 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4727 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4729 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4730 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4736 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4739 /* If TEMP is invalid, we could temporarily save a general
4740 register to MACL. However, there is currently no need
4741 to handle this case, so just abort when we see it. */
4743 || current_function_interrupt
4744 || ! call_used_regs[temp] || fixed_regs[temp])
4746 if (temp < 0 && ! current_function_interrupt
4747 && (TARGET_SHMEDIA || epilogue_p >= 0))
4750 COPY_HARD_REG_SET (temps, call_used_reg_set);
4751 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4755 if (current_function_return_rtx)
4757 enum machine_mode mode;
4758 mode = GET_MODE (current_function_return_rtx);
4759 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4760 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4762 for (i = 0; i < nreg; i++)
4763 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4764 if (current_function_calls_eh_return)
4766 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4767 for (i = 0; i <= 3; i++)
4768 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4771 if (TARGET_SHMEDIA && epilogue_p < 0)
4772 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4773 CLEAR_HARD_REG_BIT (temps, i);
4774 if (epilogue_p <= 0)
4776 for (i = FIRST_PARM_REG;
4777 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4778 CLEAR_HARD_REG_BIT (temps, i);
4779 if (cfun->static_chain_decl != NULL)
4780 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4782 temp = scavenge_reg (&temps);
4784 if (temp < 0 && live_regs_mask)
4785 temp = scavenge_reg (live_regs_mask);
4788 /* If we reached here, the most likely case is the (sibcall)
4789 epilogue for non SHmedia. Put a special push/pop sequence
4790 for such case as the last resort. This looks lengthy but
4791 would not be problem because it seems to be very rare. */
4792 if (! TARGET_SHMEDIA && epilogue_p)
4794 rtx adj_reg, tmp_reg, mem;
4796 /* ??? There is still the slight possibility that r4 or r5
4797 have been reserved as fixed registers or assigned as
4798 global registers, and they change during an interrupt.
4799 There are possible ways to handle this:
4800 - If we are adjusting the frame pointer (r14), we can do
4801 with a single temp register and an ordinary push / pop
4803 - Grab any call-used or call-saved registers (i.e. not
4804 fixed or globals) for the temps we need. We might
4805 also grab r14 if we are adjusting the stack pointer.
4806 If we can't find enough available registers, issue
4807 a diagnostic and abort - the user must have reserved
4808 way too many registers.
4809 But since all this is rather unlikely to happen and
4810 would require extra testing, we just abort if r4 / r5
4811 are not available. */
4812 if (fixed_regs[4] || fixed_regs[5]
4813 || global_regs[4] || global_regs[5])
4816 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4817 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4818 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4819 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4820 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4821 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4822 emit_move_insn (mem, tmp_reg);
4823 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4824 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4825 emit_move_insn (mem, tmp_reg);
4826 emit_move_insn (reg, adj_reg);
4827 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4828 emit_move_insn (adj_reg, mem);
4829 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4830 emit_move_insn (tmp_reg, mem);
4836 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4838 /* If SIZE is negative, subtract the positive value.
4839 This sometimes allows a constant pool entry to be shared
4840 between prologue and epilogue code. */
4843 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4844 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4848 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4849 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4853 = (gen_rtx_EXPR_LIST
4854 (REG_FRAME_RELATED_EXPR,
4855 gen_rtx_SET (VOIDmode, reg,
4856 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4866 RTX_FRAME_RELATED_P (x) = 1;
4870 /* Output RTL to push register RN onto the stack. */
4877 x = gen_push_fpul ();
4878 else if (rn == FPSCR_REG)
4879 x = gen_push_fpscr ();
4880 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4881 && FP_OR_XD_REGISTER_P (rn))
4883 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4885 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4887 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4888 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4890 x = gen_push (gen_rtx_REG (SImode, rn));
4894 = gen_rtx_EXPR_LIST (REG_INC,
4895 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4899 /* Output RTL to pop register RN from the stack. */
4906 x = gen_pop_fpul ();
4907 else if (rn == FPSCR_REG)
4908 x = gen_pop_fpscr ();
4909 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4910 && FP_OR_XD_REGISTER_P (rn))
4912 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4914 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4916 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4917 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4919 x = gen_pop (gen_rtx_REG (SImode, rn));
4923 = gen_rtx_EXPR_LIST (REG_INC,
4924 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4927 /* Generate code to push the regs specified in the mask. */
4930 push_regs (HARD_REG_SET *mask, int interrupt_handler)
4935 /* Push PR last; this gives better latencies after the prologue, and
4936 candidates for the return delay slot when there are no general
4937 registers pushed. */
4938 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4940 /* If this is an interrupt handler, and the SZ bit varies,
4941 and we have to push any floating point register, we need
4942 to switch to the correct precision first. */
4943 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4944 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4946 HARD_REG_SET unsaved;
4949 COMPL_HARD_REG_SET (unsaved, *mask);
4950 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4954 && (i != FPSCR_REG || ! skip_fpscr)
4955 && TEST_HARD_REG_BIT (*mask, i))
4958 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4962 /* Calculate how much extra space is needed to save all callee-saved
4964 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4967 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
4970 int stack_space = 0;
4971 int interrupt_handler = sh_cfun_interrupt_handler_p ();
4973 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4974 if ((! call_used_regs[reg] || interrupt_handler)
4975 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4976 /* Leave space to save this target register on the stack,
4977 in case target register allocation wants to use it. */
4978 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4982 /* Decide whether we should reserve space for callee-save target registers,
4983 in case target register allocation wants to use them. REGS_SAVED is
4984 the space, in bytes, that is already required for register saves.
4985 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4988 shmedia_reserve_space_for_target_registers_p (int regs_saved,
4989 HARD_REG_SET *live_regs_mask)
4993 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
4996 /* Decide how much space to reserve for callee-save target registers
4997 in case target register allocation wants to use them.
4998 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5001 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5003 if (shmedia_space_reserved_for_target_registers)
5004 return shmedia_target_regs_stack_space (live_regs_mask);
5009 /* Work out the registers which need to be saved, both as a mask and a
5010 count of saved words. Return the count.
5012 If doing a pragma interrupt function, then push all regs used by the
5013 function, and if we call another function (we can tell by looking at PR),
5014 make sure that all the regs it clobbers are safe too. */
5017 calc_live_regs (HARD_REG_SET *live_regs_mask)
5021 int interrupt_handler;
5022 int pr_live, has_call;
5024 interrupt_handler = sh_cfun_interrupt_handler_p ();
5026 CLEAR_HARD_REG_SET (*live_regs_mask);
5027 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
5028 && regs_ever_live[FPSCR_REG])
5029 target_flags &= ~FPU_SINGLE_BIT;
5030 /* If we can save a lot of saves by switching to double mode, do that. */
5031 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
5032 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5033 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5034 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
5037 target_flags &= ~FPU_SINGLE_BIT;
5040 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5041 knows how to use it. That means the pseudo originally allocated for
5042 the initial value can become the PR_MEDIA_REG hard register, as seen for
5043 execute/20010122-1.c:test9. */
5045 /* ??? this function is called from initial_elimination_offset, hence we
5046 can't use the result of sh_media_register_for_return here. */
5047 pr_live = sh_pr_n_sets ();
5050 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5051 pr_live = (pr_initial
5052 ? (GET_CODE (pr_initial) != REG
5053 || REGNO (pr_initial) != (PR_REG))
5054 : regs_ever_live[PR_REG]);
5055 /* For Shcompact, if not optimizing, we end up with a memory reference
5056 using the return address pointer for __builtin_return_address even
5057 though there is no actual need to put the PR register on the stack. */
5058 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5060 /* Force PR to be live if the prologue has to call the SHmedia
5061 argument decoder or register saver. */
5062 if (TARGET_SHCOMPACT
5063 && ((current_function_args_info.call_cookie
5064 & ~ CALL_COOKIE_RET_TRAMP (1))
5065 || current_function_has_nonlocal_label))
5067 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5068 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5070 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5072 : (interrupt_handler && ! pragma_trapa)
5073 ? (/* Need to save all the regs ever live. */
5074 (regs_ever_live[reg]
5075 || (call_used_regs[reg]
5076 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
5078 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5079 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5080 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5081 && reg != RETURN_ADDRESS_POINTER_REGNUM
5082 && reg != T_REG && reg != GBR_REG
5083 /* Push fpscr only on targets which have FPU */
5084 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5085 : (/* Only push those regs which are used and need to be saved. */
5088 && current_function_args_info.call_cookie
5089 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5090 || (regs_ever_live[reg] && ! call_used_regs[reg])
5091 || (current_function_calls_eh_return
5092 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5093 || reg == (int) EH_RETURN_DATA_REGNO (1)
5094 || reg == (int) EH_RETURN_DATA_REGNO (2)
5095 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5096 || ((reg == MACL_REG || reg == MACH_REG)
5097 && regs_ever_live[reg]
5098 && sh_cfun_attr_renesas_p ())
5101 SET_HARD_REG_BIT (*live_regs_mask, reg);
5102 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5104 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
5105 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5107 if (FP_REGISTER_P (reg))
5109 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5111 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5112 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5115 else if (XD_REGISTER_P (reg))
5117 /* Must switch to double mode to access these registers. */
5118 target_flags &= ~FPU_SINGLE_BIT;
5123 /* If we have a target register optimization pass after prologue / epilogue
5124 threading, we need to assume all target registers will be live even if
5126 if (flag_branch_target_load_optimize2
5127 && TARGET_SAVE_ALL_TARGET_REGS
5128 && shmedia_space_reserved_for_target_registers)
5129 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5130 if ((! call_used_regs[reg] || interrupt_handler)
5131 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5133 SET_HARD_REG_BIT (*live_regs_mask, reg);
5134 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5136 /* If this is an interrupt handler, we don't have any call-clobbered
5137 registers we can conveniently use for target register save/restore.
5138 Make sure we save at least one general purpose register when we need
5139 to save target registers. */
5140 if (interrupt_handler
5141 && hard_regs_intersect_p (live_regs_mask,
5142 ®_class_contents[TARGET_REGS])
5143 && ! hard_regs_intersect_p (live_regs_mask,
5144 ®_class_contents[GENERAL_REGS]))
5146 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5147 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5153 /* Code to generate prologue and epilogue sequences */
5155 /* PUSHED is the number of bytes that are being pushed on the
5156 stack for register saves. Return the frame size, padded
5157 appropriately so that the stack stays properly aligned. */
5158 static HOST_WIDE_INT
5159 rounded_frame_size (int pushed)
5161 HOST_WIDE_INT size = get_frame_size ();
5162 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5164 return ((size + pushed + align - 1) & -align) - pushed;
5167 /* Choose a call-clobbered target-branch register that remains
5168 unchanged along the whole function. We set it up as the return
5169 value in the prologue. */
5171 sh_media_register_for_return (void)
5176 if (! current_function_is_leaf)
5178 if (lookup_attribute ("interrupt_handler",
5179 DECL_ATTRIBUTES (current_function_decl)))
5182 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5184 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5185 if (call_used_regs[regno] && ! regs_ever_live[regno])
5191 /* The maximum registers we need to save are:
5192 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5193 - 32 floating point registers (for each pair, we save none,
5194 one single precision value, or a double precision value).
5195 - 8 target registers
5196 - add 1 entry for a delimiter. */
5197 #define MAX_SAVED_REGS (62+32+8)
5199 typedef struct save_entry_s
5208 /* There will be a delimiter entry with VOIDmode both at the start and the
5209 end of a filled in schedule. The end delimiter has the offset of the
5210 save with the smallest (i.e. most negative) offset. */
5211 typedef struct save_schedule_s
5213 save_entry entries[MAX_SAVED_REGS + 2];
5214 int temps[MAX_TEMPS+1];
5217 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5218 use reverse order. Returns the last entry written to (not counting
5219 the delimiter). OFFSET_BASE is a number to be added to all offset
5223 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5227 save_entry *entry = schedule->entries;
5231 if (! current_function_interrupt)
5232 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5233 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5234 && ! FUNCTION_ARG_REGNO_P (i)
5235 && i != FIRST_RET_REG
5236 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5237 && ! (current_function_calls_eh_return
5238 && (i == EH_RETURN_STACKADJ_REGNO
5239 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5240 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5241 schedule->temps[tmpx++] = i;
5243 entry->mode = VOIDmode;
5244 entry->offset = offset_base;
5246 /* We loop twice: first, we save 8-byte aligned registers in the
5247 higher addresses, that are known to be aligned. Then, we
5248 proceed to saving 32-bit registers that don't need 8-byte
5250 If this is an interrupt function, all registers that need saving
5251 need to be saved in full. moreover, we need to postpone saving
5252 target registers till we have saved some general purpose registers
5253 we can then use as scratch registers. */
5254 offset = offset_base;
5255 for (align = 1; align >= 0; align--)
5257 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5258 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5260 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5263 if (current_function_interrupt)
5265 if (TARGET_REGISTER_P (i))
5267 if (GENERAL_REGISTER_P (i))
5270 if (mode == SFmode && (i % 2) == 1
5271 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5272 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5279 /* If we're doing the aligned pass and this is not aligned,
5280 or we're doing the unaligned pass and this is aligned,
5282 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5286 if (current_function_interrupt
5287 && GENERAL_REGISTER_P (i)
5288 && tmpx < MAX_TEMPS)
5289 schedule->temps[tmpx++] = i;
5291 offset -= GET_MODE_SIZE (mode);
5294 entry->offset = offset;
5297 if (align && current_function_interrupt)
5298 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5299 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5301 offset -= GET_MODE_SIZE (DImode);
5303 entry->mode = DImode;
5304 entry->offset = offset;
5309 entry->mode = VOIDmode;
5310 entry->offset = offset;
5311 schedule->temps[tmpx] = -1;
5316 sh_expand_prologue (void)
5318 HARD_REG_SET live_regs_mask;
5321 int save_flags = target_flags;
5324 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5326 /* We have pretend args if we had an object sent partially in registers
5327 and partially on the stack, e.g. a large structure. */
5328 pretend_args = current_function_pretend_args_size;
5329 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5330 && (NPARM_REGS(SImode)
5331 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5333 output_stack_adjust (-pretend_args
5334 - current_function_args_info.stack_regs * 8,
5335 stack_pointer_rtx, 0, NULL);
5337 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5338 /* We're going to use the PIC register to load the address of the
5339 incoming-argument decoder and/or of the return trampoline from
5340 the GOT, so make sure the PIC register is preserved and
5342 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5344 if (TARGET_SHCOMPACT
5345 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5349 /* First, make all registers with incoming arguments that will
5350 be pushed onto the stack live, so that register renaming
5351 doesn't overwrite them. */
5352 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5353 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5354 >= NPARM_REGS (SImode) - reg)
5355 for (; reg < NPARM_REGS (SImode); reg++)
5356 emit_insn (gen_shcompact_preserve_incoming_args
5357 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5358 else if (CALL_COOKIE_INT_REG_GET
5359 (current_function_args_info.call_cookie, reg) == 1)
5360 emit_insn (gen_shcompact_preserve_incoming_args
5361 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5363 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5365 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5366 GEN_INT (current_function_args_info.call_cookie));
5367 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5368 gen_rtx_REG (SImode, R0_REG));
5370 else if (TARGET_SHMEDIA)
5372 int tr = sh_media_register_for_return ();
5376 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5377 gen_rtx_REG (DImode, PR_MEDIA_REG));
5379 /* ??? We should suppress saving pr when we don't need it, but this
5380 is tricky because of builtin_return_address. */
5382 /* If this function only exits with sibcalls, this copy
5383 will be flagged as dead. */
5384 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5390 /* Emit the code for SETUP_VARARGS. */
5391 if (current_function_stdarg)
5393 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5395 /* Push arg regs as if they'd been provided by caller in stack. */
5396 for (i = 0; i < NPARM_REGS(SImode); i++)
5398 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5401 if (i >= (NPARM_REGS(SImode)
5402 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5406 RTX_FRAME_RELATED_P (insn) = 0;
5411 /* If we're supposed to switch stacks at function entry, do so now. */
5413 emit_insn (gen_sp_switch_1 ());
5415 d = calc_live_regs (&live_regs_mask);
5416 /* ??? Maybe we could save some switching if we can move a mode switch
5417 that already happens to be at the function start into the prologue. */
5418 if (target_flags != save_flags && ! current_function_interrupt)
5419 emit_insn (gen_toggle_sz ());
5423 int offset_base, offset;
5425 int offset_in_r0 = -1;
5427 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5428 int total_size, save_size;
5429 save_schedule schedule;
5433 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5434 && ! current_function_interrupt)
5435 r0 = gen_rtx_REG (Pmode, R0_REG);
5437 /* D is the actual number of bytes that we need for saving registers,
5438 however, in initial_elimination_offset we have committed to using
5439 an additional TREGS_SPACE amount of bytes - in order to keep both
5440 addresses to arguments supplied by the caller and local variables
5441 valid, we must keep this gap. Place it between the incoming
5442 arguments and the actually saved registers in a bid to optimize
5443 locality of reference. */
5444 total_size = d + tregs_space;
5445 total_size += rounded_frame_size (total_size);
5446 save_size = total_size - rounded_frame_size (d);
5447 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5448 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5449 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5451 /* If adjusting the stack in a single step costs nothing extra, do so.
5452 I.e. either if a single addi is enough, or we need a movi anyway,
5453 and we don't exceed the maximum offset range (the test for the
5454 latter is conservative for simplicity). */
5456 && (CONST_OK_FOR_I10 (-total_size)
5457 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5458 && total_size <= 2044)))
5459 d_rounding = total_size - save_size;
5461 offset_base = d + d_rounding;
5463 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5466 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5467 tmp_pnt = schedule.temps;
5468 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5470 enum machine_mode mode = entry->mode;
5471 int reg = entry->reg;
5472 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5474 offset = entry->offset;
5476 reg_rtx = gen_rtx_REG (mode, reg);
5478 mem_rtx = gen_rtx_MEM (mode,
5479 gen_rtx_PLUS (Pmode,
5483 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5491 if (HAVE_PRE_DECREMENT
5492 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5493 || mem_rtx == NULL_RTX
5494 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5496 pre_dec = gen_rtx_MEM (mode,
5497 gen_rtx_PRE_DEC (Pmode, r0));
5499 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5508 offset += GET_MODE_SIZE (mode);
5512 if (mem_rtx != NULL_RTX)
5515 if (offset_in_r0 == -1)
5517 emit_move_insn (r0, GEN_INT (offset));
5518 offset_in_r0 = offset;
5520 else if (offset != offset_in_r0)
5525 GEN_INT (offset - offset_in_r0)));
5526 offset_in_r0 += offset - offset_in_r0;
5529 if (pre_dec != NULL_RTX)
5535 (Pmode, r0, stack_pointer_rtx));
5539 offset -= GET_MODE_SIZE (mode);
5540 offset_in_r0 -= GET_MODE_SIZE (mode);
5545 mem_rtx = gen_rtx_MEM (mode, r0);
5547 mem_rtx = gen_rtx_MEM (mode,
5548 gen_rtx_PLUS (Pmode,
5552 /* We must not use an r0-based address for target-branch
5553 registers or for special registers without pre-dec
5554 memory addresses, since we store their values in r0
5556 if (TARGET_REGISTER_P (reg)
5557 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5558 && mem_rtx != pre_dec))
5562 if (TARGET_REGISTER_P (reg)
5563 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5564 && mem_rtx != pre_dec))
5566 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5568 emit_move_insn (tmp_reg, reg_rtx);
5570 if (REGNO (tmp_reg) == R0_REG)
5574 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5578 if (*++tmp_pnt <= 0)
5579 tmp_pnt = schedule.temps;
5586 /* Mark as interesting for dwarf cfi generator */
5587 insn = emit_move_insn (mem_rtx, reg_rtx);
5588 RTX_FRAME_RELATED_P (insn) = 1;
5590 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5592 rtx reg_rtx = gen_rtx_REG (mode, reg);
5594 rtx mem_rtx = gen_rtx_MEM (mode,
5595 gen_rtx_PLUS (Pmode,
5599 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5600 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5602 REG_NOTES (insn) = note_rtx;
5607 if (entry->offset != d_rounding)
5611 push_regs (&live_regs_mask, current_function_interrupt);
5613 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5615 rtx insn = get_last_insn ();
5616 rtx last = emit_insn (gen_GOTaddr2picreg ());
5618 /* Mark these insns as possibly dead. Sometimes, flow2 may
5619 delete all uses of the PIC register. In this case, let it
5620 delete the initialization too. */
5623 insn = NEXT_INSN (insn);
5625 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5629 while (insn != last);
5632 if (SHMEDIA_REGS_STACK_ADJUST ())
5634 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5635 function_symbol (TARGET_FPU_ANY
5636 ? "__GCC_push_shmedia_regs"
5637 : "__GCC_push_shmedia_regs_nofpu"));
5638 /* This must NOT go through the PLT, otherwise mach and macl
5639 may be clobbered. */
5640 emit_insn (gen_shmedia_save_restore_regs_compact
5641 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5644 if (target_flags != save_flags && ! current_function_interrupt)
5646 rtx insn = emit_insn (gen_toggle_sz ());
5648 /* If we're lucky, a mode switch in the function body will
5649 overwrite fpscr, turning this insn dead. Tell flow this
5650 insn is ok to delete. */
5651 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5656 target_flags = save_flags;
5658 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5659 stack_pointer_rtx, 0, NULL);
5661 if (frame_pointer_needed)
5662 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5664 if (TARGET_SHCOMPACT
5665 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5667 /* This must NOT go through the PLT, otherwise mach and macl
5668 may be clobbered. */
5669 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5670 function_symbol ("__GCC_shcompact_incoming_args"));
5671 emit_insn (gen_shcompact_incoming_args ());
5676 sh_expand_epilogue (bool sibcall_p)
5678 HARD_REG_SET live_regs_mask;
5682 int save_flags = target_flags;
5683 int frame_size, save_size;
5684 int fpscr_deferred = 0;
5685 int e = sibcall_p ? -1 : 1;
5687 d = calc_live_regs (&live_regs_mask);
5690 frame_size = rounded_frame_size (d);
5694 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5696 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5697 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5698 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5700 total_size = d + tregs_space;
5701 total_size += rounded_frame_size (total_size);
5702 save_size = total_size - frame_size;
5704 /* If adjusting the stack in a single step costs nothing extra, do so.
5705 I.e. either if a single addi is enough, or we need a movi anyway,
5706 and we don't exceed the maximum offset range (the test for the
5707 latter is conservative for simplicity). */
5709 && ! frame_pointer_needed
5710 && (CONST_OK_FOR_I10 (total_size)
5711 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5712 && total_size <= 2044)))
5713 d_rounding = frame_size;
5715 frame_size -= d_rounding;
5718 if (frame_pointer_needed)
5720 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5722 /* We must avoid moving the stack pointer adjustment past code
5723 which reads from the local frame, else an interrupt could
5724 occur after the SP adjustment and clobber data in the local
5726 emit_insn (gen_blockage ());
5727 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5729 else if (frame_size)
5731 /* We must avoid moving the stack pointer adjustment past code
5732 which reads from the local frame, else an interrupt could
5733 occur after the SP adjustment and clobber data in the local
5735 emit_insn (gen_blockage ());
5736 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5739 if (SHMEDIA_REGS_STACK_ADJUST ())
5741 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5742 function_symbol (TARGET_FPU_ANY
5743 ? "__GCC_pop_shmedia_regs"
5744 : "__GCC_pop_shmedia_regs_nofpu"));
5745 /* This must NOT go through the PLT, otherwise mach and macl
5746 may be clobbered. */
5747 emit_insn (gen_shmedia_save_restore_regs_compact
5748 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5751 /* Pop all the registers. */
5753 if (target_flags != save_flags && ! current_function_interrupt)
5754 emit_insn (gen_toggle_sz ());
5757 int offset_base, offset;
5758 int offset_in_r0 = -1;
5760 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5761 save_schedule schedule;
5765 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5766 offset_base = -entry[1].offset + d_rounding;
5767 tmp_pnt = schedule.temps;
5768 for (; entry->mode != VOIDmode; entry--)
5770 enum machine_mode mode = entry->mode;
5771 int reg = entry->reg;
5772 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5774 offset = offset_base + entry->offset;
5775 reg_rtx = gen_rtx_REG (mode, reg);
5777 mem_rtx = gen_rtx_MEM (mode,
5778 gen_rtx_PLUS (Pmode,
5782 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5788 if (HAVE_POST_INCREMENT
5789 && (offset == offset_in_r0
5790 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5791 && mem_rtx == NULL_RTX)
5792 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5794 post_inc = gen_rtx_MEM (mode,
5795 gen_rtx_POST_INC (Pmode, r0));
5797 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5800 post_inc = NULL_RTX;
5809 if (mem_rtx != NULL_RTX)
5812 if (offset_in_r0 == -1)
5814 emit_move_insn (r0, GEN_INT (offset));
5815 offset_in_r0 = offset;
5817 else if (offset != offset_in_r0)
5822 GEN_INT (offset - offset_in_r0)));
5823 offset_in_r0 += offset - offset_in_r0;
5826 if (post_inc != NULL_RTX)
5832 (Pmode, r0, stack_pointer_rtx));
5838 offset_in_r0 += GET_MODE_SIZE (mode);
5841 mem_rtx = gen_rtx_MEM (mode, r0);
5843 mem_rtx = gen_rtx_MEM (mode,
5844 gen_rtx_PLUS (Pmode,
5848 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5849 && mem_rtx != post_inc)
5853 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5854 && mem_rtx != post_inc)
5856 insn = emit_move_insn (r0, mem_rtx);
5859 else if (TARGET_REGISTER_P (reg))
5861 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5863 /* Give the scheduler a bit of freedom by using up to
5864 MAX_TEMPS registers in a round-robin fashion. */
5865 insn = emit_move_insn (tmp_reg, mem_rtx);
5868 tmp_pnt = schedule.temps;
5871 insn = emit_move_insn (reg_rtx, mem_rtx);
5872 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5873 /* This is dead, unless we return with a sibcall. */
5874 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5879 if (entry->offset + offset_base != d + d_rounding)
5882 else /* ! TARGET_SH5 */
5885 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5887 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5889 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5891 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5892 && hard_regs_intersect_p (&live_regs_mask,
5893 ®_class_contents[DF_REGS]))
5895 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5897 if (j == FIRST_FP_REG && fpscr_deferred)
5902 if (target_flags != save_flags && ! current_function_interrupt)
5903 emit_insn (gen_toggle_sz ());
5904 target_flags = save_flags;
5906 output_stack_adjust (current_function_pretend_args_size
5907 + save_size + d_rounding
5908 + current_function_args_info.stack_regs * 8,
5909 stack_pointer_rtx, e, NULL);
5911 if (current_function_calls_eh_return)
5912 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5913 EH_RETURN_STACKADJ_RTX));
5915 /* Switch back to the normal stack if necessary. */
5917 emit_insn (gen_sp_switch_2 ());
5919 /* Tell flow the insn that pops PR isn't dead. */
5920 /* PR_REG will never be live in SHmedia mode, and we don't need to
5921 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5922 by the return pattern. */
5923 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5924 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5927 static int sh_need_epilogue_known = 0;
5930 sh_need_epilogue (void)
5932 if (! sh_need_epilogue_known)
5937 sh_expand_epilogue (0);
5938 epilogue = get_insns ();
5940 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5942 return sh_need_epilogue_known > 0;
5945 /* Emit code to change the current function's return address to RA.
5946 TEMP is available as a scratch register, if needed. */
5949 sh_set_return_address (rtx ra, rtx tmp)
5951 HARD_REG_SET live_regs_mask;
5953 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5956 d = calc_live_regs (&live_regs_mask);
5958 /* If pr_reg isn't life, we can set it (or the register given in
5959 sh_media_register_for_return) directly. */
5960 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5966 int rr_regno = sh_media_register_for_return ();
5971 rr = gen_rtx_REG (DImode, rr_regno);
5974 rr = gen_rtx_REG (SImode, pr_reg);
5976 emit_insn (GEN_MOV (rr, ra));
5977 /* Tell flow the register for return isn't dead. */
5978 emit_insn (gen_rtx_USE (VOIDmode, rr));
5985 save_schedule schedule;
5988 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
5989 offset = entry[1].offset;
5990 for (; entry->mode != VOIDmode; entry--)
5991 if (entry->reg == pr_reg)
5994 /* We can't find pr register. */
5998 offset = entry->offset - offset;
5999 pr_offset = (rounded_frame_size (d) + offset
6000 + SHMEDIA_REGS_STACK_ADJUST ());
6003 pr_offset = rounded_frame_size (d);
6005 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6006 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6008 tmp = gen_rtx_MEM (Pmode, tmp);
6009 emit_insn (GEN_MOV (tmp, ra));
6012 /* Clear variables at function end. */
6015 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6016 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6018 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6019 sh_need_epilogue_known = 0;
6020 sp_switch = NULL_RTX;
6024 sh_builtin_saveregs (void)
6026 /* First unnamed integer register. */
6027 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6028 /* Number of integer registers we need to save. */
6029 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6030 /* First unnamed SFmode float reg */
6031 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6032 /* Number of SFmode float regs to save. */
6033 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6036 HOST_WIDE_INT alias_set;
6042 int pushregs = n_intregs;
6044 while (pushregs < NPARM_REGS (SImode) - 1
6045 && (CALL_COOKIE_INT_REG_GET
6046 (current_function_args_info.call_cookie,
6047 NPARM_REGS (SImode) - pushregs)
6050 current_function_args_info.call_cookie
6051 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6056 if (pushregs == NPARM_REGS (SImode))
6057 current_function_args_info.call_cookie
6058 |= (CALL_COOKIE_INT_REG (0, 1)
6059 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6061 current_function_args_info.call_cookie
6062 |= CALL_COOKIE_STACKSEQ (pushregs);
6064 current_function_pretend_args_size += 8 * n_intregs;
6066 if (TARGET_SHCOMPACT)
6070 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6072 error ("__builtin_saveregs not supported by this subtarget");
6079 /* Allocate block of memory for the regs. */
6080 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6081 Or can assign_stack_local accept a 0 SIZE argument? */
6082 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6085 regbuf = gen_rtx_MEM (BLKmode,
6086 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6087 else if (n_floatregs & 1)
6091 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6092 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6093 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6094 regbuf = change_address (regbuf, BLKmode, addr);
6097 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6098 alias_set = get_varargs_alias_set ();
6099 set_mem_alias_set (regbuf, alias_set);
6102 This is optimized to only save the regs that are necessary. Explicitly
6103 named args need not be saved. */
6105 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6106 adjust_address (regbuf, BLKmode,
6107 n_floatregs * UNITS_PER_WORD),
6111 /* Return the address of the regbuf. */
6112 return XEXP (regbuf, 0);
6115 This is optimized to only save the regs that are necessary. Explicitly
6116 named args need not be saved.
6117 We explicitly build a pointer to the buffer because it halves the insn
6118 count when not optimizing (otherwise the pointer is built for each reg
6120 We emit the moves in reverse order so that we can use predecrement. */
6122 fpregs = gen_reg_rtx (Pmode);
6123 emit_move_insn (fpregs, XEXP (regbuf, 0));
6124 emit_insn (gen_addsi3 (fpregs, fpregs,
6125 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6129 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6131 emit_insn (gen_addsi3 (fpregs, fpregs,
6132 GEN_INT (-2 * UNITS_PER_WORD)));
6133 mem = gen_rtx_MEM (DFmode, fpregs);
6134 set_mem_alias_set (mem, alias_set);
6135 emit_move_insn (mem,
6136 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6138 regno = first_floatreg;
6141 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6142 mem = gen_rtx_MEM (SFmode, fpregs);
6143 set_mem_alias_set (mem, alias_set);
6144 emit_move_insn (mem,
6145 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6146 - (TARGET_LITTLE_ENDIAN != 0)));
6150 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6154 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6155 mem = gen_rtx_MEM (SFmode, fpregs);
6156 set_mem_alias_set (mem, alias_set);
6157 emit_move_insn (mem,
6158 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6161 /* Return the address of the regbuf. */
6162 return XEXP (regbuf, 0);
6165 /* Define the `__builtin_va_list' type for the ABI. */
6168 sh_build_builtin_va_list (void)
6170 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6173 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6174 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6175 return ptr_type_node;
6177 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6179 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6181 f_next_o_limit = build_decl (FIELD_DECL,
6182 get_identifier ("__va_next_o_limit"),
6184 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6186 f_next_fp_limit = build_decl (FIELD_DECL,
6187 get_identifier ("__va_next_fp_limit"),
6189 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6192 DECL_FIELD_CONTEXT (f_next_o) = record;
6193 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6194 DECL_FIELD_CONTEXT (f_next_fp) = record;
6195 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6196 DECL_FIELD_CONTEXT (f_next_stack) = record;
6198 TYPE_FIELDS (record) = f_next_o;
6199 TREE_CHAIN (f_next_o) = f_next_o_limit;
6200 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6201 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6202 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6204 layout_type (record);
6209 /* Implement `va_start' for varargs and stdarg. */
6212 sh_va_start (tree valist, rtx nextarg)
6214 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6215 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6221 expand_builtin_saveregs ();
6222 std_expand_builtin_va_start (valist, nextarg);
6226 if ((! TARGET_SH2E && ! TARGET_SH4)
6227 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6229 std_expand_builtin_va_start (valist, nextarg);
6233 f_next_o = TYPE_FIELDS (va_list_type_node);
6234 f_next_o_limit = TREE_CHAIN (f_next_o);
6235 f_next_fp = TREE_CHAIN (f_next_o_limit);
6236 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6237 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6239 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6241 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6242 valist, f_next_o_limit, NULL_TREE);
6243 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6245 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6246 valist, f_next_fp_limit, NULL_TREE);
6247 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6248 valist, f_next_stack, NULL_TREE);
6250 /* Call __builtin_saveregs. */
6251 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6252 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6253 TREE_SIDE_EFFECTS (t) = 1;
6254 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6256 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6261 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6262 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6263 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6264 TREE_SIDE_EFFECTS (t) = 1;
6265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6267 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6268 TREE_SIDE_EFFECTS (t) = 1;
6269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6271 nint = current_function_args_info.arg_count[SH_ARG_INT];
6276 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6277 build_int_2 (UNITS_PER_WORD * nint, 0)));
6278 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6279 TREE_SIDE_EFFECTS (t) = 1;
6280 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6282 u = make_tree (ptr_type_node, nextarg);
6283 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6284 TREE_SIDE_EFFECTS (t) = 1;
6285 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6288 /* Implement `va_arg'. */
6291 sh_va_arg (tree valist, tree type)
6293 HOST_WIDE_INT size, rsize;
6294 tree tmp, pptr_type_node;
6296 rtx result_ptr, result = NULL_RTX;
6297 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
6300 size = int_size_in_bytes (type);
6301 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6302 pptr_type_node = build_pointer_type (ptr_type_node);
6305 type = build_pointer_type (type);
6307 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6308 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6310 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6311 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6315 f_next_o = TYPE_FIELDS (va_list_type_node);
6316 f_next_o_limit = TREE_CHAIN (f_next_o);
6317 f_next_fp = TREE_CHAIN (f_next_o_limit);
6318 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6319 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6321 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6323 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6324 valist, f_next_o_limit, NULL_TREE);
6325 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6326 valist, f_next_fp, NULL_TREE);
6327 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6328 valist, f_next_fp_limit, NULL_TREE);
6329 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6330 valist, f_next_stack, NULL_TREE);
6332 /* Structures with a single member with a distinct mode are passed
6333 like their member. This is relevant if the latter has a REAL_TYPE
6334 or COMPLEX_TYPE type. */
6335 if (TREE_CODE (type) == RECORD_TYPE
6336 && TYPE_FIELDS (type)
6337 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6338 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6339 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6340 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6341 type = TREE_TYPE (TYPE_FIELDS (type));
6344 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6345 || (TREE_CODE (type) == COMPLEX_TYPE
6346 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6351 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6354 addr_rtx = gen_reg_rtx (Pmode);
6355 lab_false = gen_label_rtx ();
6356 lab_over = gen_label_rtx ();
6358 tmp = make_tree (pptr_type_node, addr_rtx);
6359 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
6364 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6365 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6367 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
6369 expand_expr (next_fp_limit, NULL_RTX,
6370 Pmode, EXPAND_NORMAL),
6371 GE, const1_rtx, Pmode, 1, lab_false);
6373 if (TYPE_ALIGN (type) > BITS_PER_WORD
6374 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6375 && (n_floatregs & 1)))
6377 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
6378 build_int_2 (UNITS_PER_WORD, 0));
6379 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6380 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6381 TREE_SIDE_EFFECTS (tmp) = 1;
6382 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6385 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6386 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6388 emit_move_insn (addr_rtx, r);
6390 #ifdef FUNCTION_ARG_SCmode_WART
6391 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6393 rtx addr, real, imag, result_value, slot;
6394 tree subtype = TREE_TYPE (type);
6396 addr = std_expand_builtin_va_arg (valist, subtype);
6397 #ifdef POINTERS_EXTEND_UNSIGNED
6398 if (GET_MODE (addr) != Pmode)
6399 addr = convert_memory_address (Pmode, addr);
6401 imag = gen_rtx_MEM (TYPE_MODE (type), addr);
6402 set_mem_alias_set (imag, get_varargs_alias_set ());
6404 addr = std_expand_builtin_va_arg (valist, subtype);
6405 #ifdef POINTERS_EXTEND_UNSIGNED
6406 if (GET_MODE (addr) != Pmode)
6407 addr = convert_memory_address (Pmode, addr);
6409 real = gen_rtx_MEM (TYPE_MODE (type), addr);
6410 set_mem_alias_set (real, get_varargs_alias_set ());
6412 result_value = gen_rtx_CONCAT (SCmode, real, imag);
6413 /* ??? this interface is stupid - why require a pointer? */
6414 result = gen_reg_rtx (Pmode);
6415 slot = assign_stack_temp (SCmode, 8, 0);
6416 emit_move_insn (slot, result_value);
6417 emit_move_insn (result, XEXP (slot, 0));
6419 #endif /* FUNCTION_ARG_SCmode_WART */
6421 emit_jump_insn (gen_jump (lab_over));
6423 emit_label (lab_false);
6425 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6426 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6428 emit_move_insn (addr_rtx, r);
6432 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
6433 build_int_2 (rsize, 0));
6435 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
6437 expand_expr (next_o_limit, NULL_RTX,
6438 Pmode, EXPAND_NORMAL),
6439 GT, const1_rtx, Pmode, 1, lab_false);
6441 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6442 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6444 emit_move_insn (addr_rtx, r);
6446 emit_jump_insn (gen_jump (lab_over));
6448 emit_label (lab_false);
6450 if (size > 4 && ! TARGET_SH4)
6452 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6453 TREE_SIDE_EFFECTS (tmp) = 1;
6454 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6457 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6458 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6460 emit_move_insn (addr_rtx, r);
6464 emit_label (lab_over);
6467 /* ??? In va-sh.h, there had been code to make values larger than
6468 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6470 result_ptr = std_expand_builtin_va_arg (valist, type);
6473 emit_move_insn (result, result_ptr);
6474 emit_label (lab_over);
6477 result = result_ptr;
6481 #ifdef POINTERS_EXTEND_UNSIGNED
6482 if (GET_MODE (addr) != Pmode)
6483 addr = convert_memory_address (Pmode, result);
6485 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
6486 set_mem_alias_set (result, get_varargs_alias_set ());
6488 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
6489 argument to the varargs alias set. */
6494 sh_promote_prototypes (tree type)
6500 return ! sh_attr_renesas_p (type);
6503 /* Define where to put the arguments to a function.
6504 Value is zero to push the argument on the stack,
6505 or a hard register in which to store the argument.
6507 MODE is the argument's machine mode.
6508 TYPE is the data type of the argument (as a tree).
6509 This is null for libcalls where that information may
6511 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6512 the preceding args and about the function being called.
6513 NAMED is nonzero if this argument is a named parameter
6514 (otherwise it is an extra parameter matching an ellipsis).
6516 On SH the first args are normally in registers
6517 and the rest are pushed. Any arg that starts within the first
6518 NPARM_REGS words is at least partially passed in a register unless
6519 its data type forbids. */
6523 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6524 tree type, int named)
6526 if (! TARGET_SH5 && mode == VOIDmode)
6527 return GEN_INT (ca->renesas_abi ? 1 : 0);
6530 && PASS_IN_REG_P (*ca, mode, type)
6531 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6535 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6536 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6538 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6539 gen_rtx_REG (SFmode,
6541 + (ROUND_REG (*ca, mode) ^ 1)),
6543 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6544 gen_rtx_REG (SFmode,
6546 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6548 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6551 /* If the alignment of a DF value causes an SF register to be
6552 skipped, we will use that skipped register for the next SF
6554 if ((TARGET_HITACHI || ca->renesas_abi)
6555 && ca->free_single_fp_reg
6557 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6559 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6560 ^ (mode == SFmode && TARGET_SH4
6561 && TARGET_LITTLE_ENDIAN != 0
6562 && ! TARGET_HITACHI && ! ca->renesas_abi);
6563 return gen_rtx_REG (mode, regno);
6569 if (mode == VOIDmode && TARGET_SHCOMPACT)
6570 return GEN_INT (ca->call_cookie);
6572 /* The following test assumes unnamed arguments are promoted to
6574 if (mode == SFmode && ca->free_single_fp_reg)
6575 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6577 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6578 && (named || ! ca->prototype_p)
6579 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6581 if (! ca->prototype_p && TARGET_SHMEDIA)
6582 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6584 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6586 + ca->arg_count[(int) SH_ARG_FLOAT]);
6589 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6590 && (! TARGET_SHCOMPACT
6591 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6592 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6595 return gen_rtx_REG (mode, (FIRST_PARM_REG
6596 + ca->arg_count[(int) SH_ARG_INT]));
6605 /* Update the data in CUM to advance over an argument
6606 of mode MODE and data type TYPE.
6607 (TYPE is null for libcalls where that information may not be
6611 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6612 tree type, int named)
6616 else if (TARGET_SH5)
6618 tree type2 = (ca->byref && type
6621 enum machine_mode mode2 = (ca->byref && type
6624 int dwords = ((ca->byref
6627 ? int_size_in_bytes (type2)
6628 : GET_MODE_SIZE (mode2)) + 7) / 8;
6629 int numregs = MIN (dwords, NPARM_REGS (SImode)
6630 - ca->arg_count[(int) SH_ARG_INT]);
6634 ca->arg_count[(int) SH_ARG_INT] += numregs;
6635 if (TARGET_SHCOMPACT
6636 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6639 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6641 /* N.B. We want this also for outgoing. */
6642 ca->stack_regs += numregs;
6647 ca->stack_regs += numregs;
6648 ca->byref_regs += numregs;
6652 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6656 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6659 else if (dwords > numregs)
6661 int pushregs = numregs;
6663 if (TARGET_SHCOMPACT)
6664 ca->stack_regs += numregs;
6665 while (pushregs < NPARM_REGS (SImode) - 1
6666 && (CALL_COOKIE_INT_REG_GET
6668 NPARM_REGS (SImode) - pushregs)
6672 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6676 if (numregs == NPARM_REGS (SImode))
6678 |= CALL_COOKIE_INT_REG (0, 1)
6679 | CALL_COOKIE_STACKSEQ (numregs - 1);
6682 |= CALL_COOKIE_STACKSEQ (numregs);
6685 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6686 && (named || ! ca->prototype_p))
6688 if (mode2 == SFmode && ca->free_single_fp_reg)
6689 ca->free_single_fp_reg = 0;
6690 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6691 < NPARM_REGS (SFmode))
6694 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6696 - ca->arg_count[(int) SH_ARG_FLOAT]);
6698 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6700 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6702 if (ca->outgoing && numregs > 0)
6706 |= (CALL_COOKIE_INT_REG
6707 (ca->arg_count[(int) SH_ARG_INT]
6708 - numregs + ((numfpregs - 2) / 2),
6709 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6712 while (numfpregs -= 2);
6714 else if (mode2 == SFmode && (named)
6715 && (ca->arg_count[(int) SH_ARG_FLOAT]
6716 < NPARM_REGS (SFmode)))
6717 ca->free_single_fp_reg
6718 = FIRST_FP_PARM_REG - numfpregs
6719 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6725 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6727 /* Note that we've used the skipped register. */
6728 if (mode == SFmode && ca->free_single_fp_reg)
6730 ca->free_single_fp_reg = 0;
6733 /* When we have a DF after an SF, there's an SF register that get
6734 skipped in order to align the DF value. We note this skipped
6735 register, because the next SF value will use it, and not the
6736 SF that follows the DF. */
6738 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6740 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6741 + BASE_ARG_REG (mode));
6745 if (! (TARGET_SH4 || ca->renesas_abi)
6746 || PASS_IN_REG_P (*ca, mode, type))
6747 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6748 = (ROUND_REG (*ca, mode)
6750 ? ROUND_ADVANCE (int_size_in_bytes (type))
6751 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6754 /* The Renesas calling convention doesn't quite fit into this scheme since
6755 the address is passed like an invisible argument, but one that is always
6756 passed in memory. */
6758 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6760 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6762 return gen_rtx_REG (Pmode, 2);
6765 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6768 sh_return_in_memory (tree type, tree fndecl)
6772 if (TYPE_MODE (type) == BLKmode)
6773 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6775 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6779 return (TYPE_MODE (type) == BLKmode
6780 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6781 && TREE_CODE (type) == RECORD_TYPE));
6785 /* We actually emit the code in sh_expand_prologue. We used to use
6786 a static variable to flag that we need to emit this code, but that
6787 doesn't when inlining, when functions are deferred and then emitted
6788 later. Fortunately, we already have two flags that are part of struct
6789 function that tell if a function uses varargs or stdarg. */
6791 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6792 enum machine_mode mode,
6794 int *pretend_arg_size,
6795 int second_time ATTRIBUTE_UNUSED)
6797 if (! current_function_stdarg)
6799 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6801 int named_parm_regs, anon_parm_regs;
6803 named_parm_regs = (ROUND_REG (*ca, mode)
6805 ? ROUND_ADVANCE (int_size_in_bytes (type))
6806 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6807 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6808 if (anon_parm_regs > 0)
6809 *pretend_arg_size = anon_parm_regs * 4;
6814 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6820 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6822 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6826 /* Define the offset between two registers, one to be eliminated, and
6827 the other its replacement, at the start of a routine. */
6830 initial_elimination_offset (int from, int to)
6833 int regs_saved_rounding = 0;
6834 int total_saved_regs_space;
6835 int total_auto_space;
6836 int save_flags = target_flags;
6838 HARD_REG_SET live_regs_mask;
6840 shmedia_space_reserved_for_target_registers = false;
6841 regs_saved = calc_live_regs (&live_regs_mask);
6842 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6844 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6846 shmedia_space_reserved_for_target_registers = true;
6847 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6850 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6851 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6852 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6854 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6855 copy_flags = target_flags;
6856 target_flags = save_flags;
6858 total_saved_regs_space = regs_saved + regs_saved_rounding;
6860 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6861 return total_saved_regs_space + total_auto_space
6862 + current_function_args_info.byref_regs * 8;
6864 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6865 return total_saved_regs_space + total_auto_space
6866 + current_function_args_info.byref_regs * 8;
6868 /* Initial gap between fp and sp is 0. */
6869 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6872 if (from == RETURN_ADDRESS_POINTER_REGNUM
6873 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6877 int n = total_saved_regs_space;
6878 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6879 save_schedule schedule;
6882 n += total_auto_space;
6884 /* If it wasn't saved, there's not much we can do. */
6885 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6888 target_flags = copy_flags;
6890 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6891 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6892 if (entry->reg == pr_reg)
6894 target_flags = save_flags;
6895 return entry->offset;
6900 return total_auto_space;
6906 /* Handle machine specific pragmas to be semi-compatible with Renesas
6910 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6912 pragma_interrupt = 1;
6916 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6918 pragma_interrupt = pragma_trapa = 1;
6922 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6924 pragma_nosave_low_regs = 1;
6927 /* Generate 'handle_interrupt' attribute for decls */
6930 sh_insert_attributes (tree node, tree *attributes)
6932 if (! pragma_interrupt
6933 || TREE_CODE (node) != FUNCTION_DECL)
6936 /* We are only interested in fields. */
6937 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6940 /* Add a 'handle_interrupt' attribute. */
6941 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6946 /* Supported attributes:
6948 interrupt_handler -- specifies this function is an interrupt handler.
6950 sp_switch -- specifies an alternate stack for an interrupt handler
6953 trap_exit -- use a trapa to exit an interrupt function instead of
6956 renesas -- use Renesas calling/layout conventions (functions and
6961 const struct attribute_spec sh_attribute_table[] =
6963 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6964 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6965 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6966 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6967 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
6968 { NULL, 0, 0, false, false, false, NULL }
6971 /* Handle an "interrupt_handler" attribute; arguments as in
6972 struct attribute_spec.handler. */
6974 sh_handle_interrupt_handler_attribute (tree *node, tree name,
6975 tree args ATTRIBUTE_UNUSED,
6976 int flags ATTRIBUTE_UNUSED,
6979 if (TREE_CODE (*node) != FUNCTION_DECL)
6981 warning ("`%s' attribute only applies to functions",
6982 IDENTIFIER_POINTER (name));
6983 *no_add_attrs = true;
6985 else if (TARGET_SHCOMPACT)
6987 error ("attribute interrupt_handler is not compatible with -m5-compact");
6988 *no_add_attrs = true;
6994 /* Handle an "sp_switch" attribute; arguments as in
6995 struct attribute_spec.handler. */
6997 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
6998 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7000 if (TREE_CODE (*node) != FUNCTION_DECL)
7002 warning ("`%s' attribute only applies to functions",
7003 IDENTIFIER_POINTER (name));
7004 *no_add_attrs = true;
7006 else if (!pragma_interrupt)
7008 /* The sp_switch attribute only has meaning for interrupt functions. */
7009 warning ("`%s' attribute only applies to interrupt functions",
7010 IDENTIFIER_POINTER (name));
7011 *no_add_attrs = true;
7013 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7015 /* The argument must be a constant string. */
7016 warning ("`%s' attribute argument not a string constant",
7017 IDENTIFIER_POINTER (name));
7018 *no_add_attrs = true;
7022 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7023 TREE_STRING_POINTER (TREE_VALUE (args)));
7029 /* Handle an "trap_exit" attribute; arguments as in
7030 struct attribute_spec.handler. */
7032 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7033 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7035 if (TREE_CODE (*node) != FUNCTION_DECL)
7037 warning ("`%s' attribute only applies to functions",
7038 IDENTIFIER_POINTER (name));
7039 *no_add_attrs = true;
7041 else if (!pragma_interrupt)
7043 /* The trap_exit attribute only has meaning for interrupt functions. */
7044 warning ("`%s' attribute only applies to interrupt functions",
7045 IDENTIFIER_POINTER (name));
7046 *no_add_attrs = true;
7048 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7050 /* The argument must be a constant integer. */
7051 warning ("`%s' attribute argument not an integer constant",
7052 IDENTIFIER_POINTER (name));
7053 *no_add_attrs = true;
7057 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7064 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7065 tree name ATTRIBUTE_UNUSED,
7066 tree args ATTRIBUTE_UNUSED,
7067 int flags ATTRIBUTE_UNUSED,
7068 bool *no_add_attrs ATTRIBUTE_UNUSED)
7073 /* True if __attribute__((renesas)) or -mrenesas. */
7075 sh_attr_renesas_p (tree td)
7082 td = TREE_TYPE (td);
7083 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7087 /* True if __attribute__((renesas)) or -mrenesas, for the current
7090 sh_cfun_attr_renesas_p (void)
7092 return sh_attr_renesas_p (current_function_decl);
7096 sh_cfun_interrupt_handler_p (void)
7098 return (lookup_attribute ("interrupt_handler",
7099 DECL_ATTRIBUTES (current_function_decl))
7103 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7106 const char *const name;
7108 const char *const description;
7110 sh_target_switches[] = TARGET_SWITCHES;
7111 #define target_switches sh_target_switches
7113 /* Like default_pch_valid_p, but take flag_mask into account. */
7115 sh_pch_valid_p (const void *data_p, size_t len)
7117 const char *data = (const char *)data_p;
7118 const char *flag_that_differs = NULL;
7122 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7123 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7125 /* -fpic and -fpie also usually make a PCH invalid. */
7126 if (data[0] != flag_pic)
7127 return _("created and used with different settings of -fpic");
7128 if (data[1] != flag_pie)
7129 return _("created and used with different settings of -fpie");
7132 /* Check target_flags. */
7133 memcpy (&old_flags, data, sizeof (target_flags));
7134 if (((old_flags ^ target_flags) & flag_mask) != 0)
7136 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7140 bits = target_switches[i].value;
7144 if ((target_flags & bits) != (old_flags & bits))
7146 flag_that_differs = target_switches[i].name;
7152 data += sizeof (target_flags);
7153 len -= sizeof (target_flags);
7155 /* Check string options. */
7156 #ifdef TARGET_OPTIONS
7157 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7159 const char *str = *target_options[i].variable;
7163 l = strlen (str) + 1;
7164 if (len < l || memcmp (data, str, l) != 0)
7166 flag_that_differs = target_options[i].prefix;
7179 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7182 return _("out of memory");
7187 /* Predicates used by the templates. */
7189 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7190 Used only in general_movsrc_operand. */
7193 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7205 /* Returns 1 if OP can be source of a simple move operation.
7206 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7207 invalid as are subregs of system registers. */
7210 general_movsrc_operand (rtx op, enum machine_mode mode)
7212 if (GET_CODE (op) == MEM)
7214 rtx inside = XEXP (op, 0);
7215 if (GET_CODE (inside) == CONST)
7216 inside = XEXP (inside, 0);
7218 if (GET_CODE (inside) == LABEL_REF)
7221 if (GET_CODE (inside) == PLUS
7222 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7223 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7226 /* Only post inc allowed. */
7227 if (GET_CODE (inside) == PRE_DEC)
7231 if ((mode == QImode || mode == HImode)
7232 && (GET_CODE (op) == SUBREG
7233 && GET_CODE (XEXP (op, 0)) == REG
7234 && system_reg_operand (XEXP (op, 0), mode)))
7237 return general_operand (op, mode);
7240 /* Returns 1 if OP can be a destination of a move.
7241 Same as general_operand, but no preinc allowed. */
7244 general_movdst_operand (rtx op, enum machine_mode mode)
7246 /* Only pre dec allowed. */
7247 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7250 return general_operand (op, mode);
7253 /* Returns 1 if OP is a normal arithmetic register. */
7256 arith_reg_operand (rtx op, enum machine_mode mode)
7258 if (register_operand (op, mode))
7262 if (GET_CODE (op) == REG)
7264 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7265 regno = REGNO (SUBREG_REG (op));
7269 return (regno != T_REG && regno != PR_REG
7270 && ! TARGET_REGISTER_P (regno)
7271 && (regno != FPUL_REG || TARGET_SH4)
7272 && regno != MACH_REG && regno != MACL_REG);
7277 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7278 because this would lead to missing sign extensions when truncating from
7279 DImode to SImode. */
7281 arith_reg_dest (rtx op, enum machine_mode mode)
7283 if (mode == DImode && GET_CODE (op) == SUBREG
7284 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7286 return arith_reg_operand (op, mode);
7290 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7292 enum machine_mode op_mode = GET_MODE (op);
7294 if (GET_MODE_CLASS (op_mode) != MODE_INT
7295 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7297 if (! reload_completed)
7299 return true_regnum (op) <= LAST_GENERAL_REG;
7303 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7305 if (register_operand (op, mode))
7309 if (GET_CODE (op) == REG)
7311 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7312 regno = REGNO (SUBREG_REG (op));
7316 return (regno >= FIRST_PSEUDO_REGISTER
7317 || FP_REGISTER_P (regno));
7322 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7325 arith_operand (rtx op, enum machine_mode mode)
7327 if (arith_reg_operand (op, mode))
7332 /* FIXME: We should be checking whether the CONST_INT fits in a
7333 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7334 attempting to transform a sequence of two 64-bit sets of the
7335 same register from literal constants into a set and an add,
7336 when the difference is too wide for an add. */
7337 if (GET_CODE (op) == CONST_INT
7338 || EXTRA_CONSTRAINT_C16 (op))
7343 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7349 /* Returns 1 if OP is a valid source operand for a compare insn. */
7352 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7354 if (arith_reg_operand (op, mode))
7357 if (EXTRA_CONSTRAINT_Z (op))
7363 /* Return 1 if OP is a valid source operand for an SHmedia operation
7364 that takes either a register or a 6-bit immediate. */
7367 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7369 return (arith_reg_operand (op, mode)
7370 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7373 /* Returns 1 if OP is a valid source operand for a logical operation. */
7376 logical_operand (rtx op, enum machine_mode mode)
7378 if (arith_reg_operand (op, mode))
7383 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7388 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7395 and_operand (rtx op, enum machine_mode mode)
7397 if (logical_operand (op, mode))
7400 /* Check mshflo.l / mshflhi.l opportunities. */
7403 && GET_CODE (op) == CONST_INT
7404 && CONST_OK_FOR_J16 (INTVAL (op)))
7410 /* Nonzero if OP is a floating point value with value 0.0. */
7413 fp_zero_operand (rtx op)
7417 if (GET_MODE (op) != SFmode)
7420 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7421 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7424 /* Nonzero if OP is a floating point value with value 1.0. */
7427 fp_one_operand (rtx op)
7431 if (GET_MODE (op) != SFmode)
7434 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7435 return REAL_VALUES_EQUAL (r, dconst1);
7438 /* For -m4 and -m4-single-only, mode switching is used. If we are
7439 compiling without -mfmovd, movsf_ie isn't taken into account for
7440 mode switching. We could check in machine_dependent_reorg for
7441 cases where we know we are in single precision mode, but there is
7442 interface to find that out during reload, so we must avoid
7443 choosing an fldi alternative during reload and thus failing to
7444 allocate a scratch register for the constant loading. */
7448 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7452 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7454 enum rtx_code code = GET_CODE (op);
7455 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7459 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7461 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
7462 && GET_MODE (op) == PSImode);
7466 fpul_operand (rtx op, enum machine_mode mode)
7469 return fp_arith_reg_operand (op, mode);
7471 return (GET_CODE (op) == REG
7472 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7473 && GET_MODE (op) == mode);
7477 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7479 return (GET_CODE (op) == SYMBOL_REF);
7482 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7484 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7486 if (GET_CODE (op) != SYMBOL_REF)
7488 return SYMBOL_REF_TLS_MODEL (op);
7492 commutative_float_operator (rtx op, enum machine_mode mode)
7494 if (GET_MODE (op) != mode)
7496 switch (GET_CODE (op))
7508 noncommutative_float_operator (rtx op, enum machine_mode mode)
7510 if (GET_MODE (op) != mode)
7512 switch (GET_CODE (op))
7524 unary_float_operator (rtx op, enum machine_mode mode)
7526 if (GET_MODE (op) != mode)
7528 switch (GET_CODE (op))
7541 binary_float_operator (rtx op, enum machine_mode mode)
7543 if (GET_MODE (op) != mode)
7545 switch (GET_CODE (op))
7559 binary_logical_operator (rtx op, enum machine_mode mode)
7561 if (GET_MODE (op) != mode)
7563 switch (GET_CODE (op))
7576 equality_comparison_operator (rtx op, enum machine_mode mode)
7578 return ((mode == VOIDmode || GET_MODE (op) == mode)
7579 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7583 greater_comparison_operator (rtx op, enum machine_mode mode)
7585 if (mode != VOIDmode && GET_MODE (op) == mode)
7587 switch (GET_CODE (op))
7600 less_comparison_operator (rtx op, enum machine_mode mode)
7602 if (mode != VOIDmode && GET_MODE (op) == mode)
7604 switch (GET_CODE (op))
7616 /* Accept pseudos and branch target registers. */
7618 target_reg_operand (rtx op, enum machine_mode mode)
7621 || GET_MODE (op) != DImode)
7624 if (GET_CODE (op) == SUBREG)
7627 if (GET_CODE (op) != REG)
7630 /* We must protect ourselves from matching pseudos that are virtual
7631 register, because they will eventually be replaced with hardware
7632 registers that aren't branch-target registers. */
7633 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7634 || TARGET_REGISTER_P (REGNO (op)))
7640 /* Same as target_reg_operand, except that label_refs and symbol_refs
7641 are accepted before reload. */
7643 target_operand (rtx op, enum machine_mode mode)
7648 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7649 && EXTRA_CONSTRAINT_Csy (op))
7650 return ! reload_completed;
7652 return target_reg_operand (op, mode);
7656 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7660 if (GET_CODE (op) != CONST_INT)
7663 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7667 extend_reg_operand (rtx op, enum machine_mode mode)
7669 return (GET_CODE (op) == TRUNCATE
7671 : arith_reg_operand) (op, mode);
7675 trunc_hi_operand (rtx op, enum machine_mode mode)
7677 enum machine_mode op_mode = GET_MODE (op);
7679 if (op_mode != SImode && op_mode != DImode
7680 && op_mode != V4HImode && op_mode != V2SImode)
7682 return extend_reg_operand (op, mode);
7686 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7688 return (GET_CODE (op) == TRUNCATE
7690 : arith_reg_or_0_operand) (op, mode);
7694 general_extend_operand (rtx op, enum machine_mode mode)
7696 return (GET_CODE (op) == TRUNCATE
7698 : nonimmediate_operand) (op, mode);
7702 inqhi_operand (rtx op, enum machine_mode mode)
7704 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7707 /* Can't use true_regnum here because copy_cost wants to know about
7708 SECONDARY_INPUT_RELOAD_CLASS. */
7709 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7713 sh_rep_vec (rtx v, enum machine_mode mode)
7718 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7719 || (GET_MODE (v) != mode && mode != VOIDmode))
7721 i = XVECLEN (v, 0) - 2;
7722 x = XVECEXP (v, 0, i + 1);
7723 if (GET_MODE_UNIT_SIZE (mode) == 1)
7725 y = XVECEXP (v, 0, i);
7726 for (i -= 2; i >= 0; i -= 2)
7727 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7728 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7733 if (XVECEXP (v, 0, i) != x)
7738 /* Determine if V is a constant vector matching MODE with only one element
7739 that is not a sign extension. Two byte-sized elements count as one. */
7741 sh_1el_vec (rtx v, enum machine_mode mode)
7744 int i, last, least, sign_ix;
7747 if (GET_CODE (v) != CONST_VECTOR
7748 || (GET_MODE (v) != mode && mode != VOIDmode))
7750 /* Determine numbers of last and of least significant elements. */
7751 last = XVECLEN (v, 0) - 1;
7752 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7753 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7756 if (GET_MODE_UNIT_SIZE (mode) == 1)
7757 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7758 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7760 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7761 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7762 ? constm1_rtx : const0_rtx);
7763 i = XVECLEN (v, 0) - 1;
7765 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7772 sh_const_vec (rtx v, enum machine_mode mode)
7776 if (GET_CODE (v) != CONST_VECTOR
7777 || (GET_MODE (v) != mode && mode != VOIDmode))
7779 i = XVECLEN (v, 0) - 1;
7781 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7786 /* Return the destination address of a branch. */
7789 branch_dest (rtx branch)
7791 rtx dest = SET_SRC (PATTERN (branch));
7794 if (GET_CODE (dest) == IF_THEN_ELSE)
7795 dest = XEXP (dest, 1);
7796 dest = XEXP (dest, 0);
7797 dest_uid = INSN_UID (dest);
7798 return INSN_ADDRESSES (dest_uid);
7801 /* Return nonzero if REG is not used after INSN.
7802 We assume REG is a reload reg, and therefore does
7803 not live past labels. It may live past calls or jumps though. */
7805 reg_unused_after (rtx reg, rtx insn)
7810 /* If the reg is set by this instruction, then it is safe for our
7811 case. Disregard the case where this is a store to memory, since
7812 we are checking a register used in the store address. */
7813 set = single_set (insn);
7814 if (set && GET_CODE (SET_DEST (set)) != MEM
7815 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7818 while ((insn = NEXT_INSN (insn)))
7824 code = GET_CODE (insn);
7827 /* If this is a label that existed before reload, then the register
7828 if dead here. However, if this is a label added by reorg, then
7829 the register may still be live here. We can't tell the difference,
7830 so we just ignore labels completely. */
7831 if (code == CODE_LABEL)
7836 if (code == JUMP_INSN)
7839 /* If this is a sequence, we must handle them all at once.
7840 We could have for instance a call that sets the target register,
7841 and an insn in a delay slot that uses the register. In this case,
7842 we must return 0. */
7843 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7848 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7850 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7851 rtx set = single_set (this_insn);
7853 if (GET_CODE (this_insn) == CALL_INSN)
7855 else if (GET_CODE (this_insn) == JUMP_INSN)
7857 if (INSN_ANNULLED_BRANCH_P (this_insn))
7862 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7864 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7866 if (GET_CODE (SET_DEST (set)) != MEM)
7872 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7877 else if (code == JUMP_INSN)
7881 set = single_set (insn);
7882 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7884 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7885 return GET_CODE (SET_DEST (set)) != MEM;
7886 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7889 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7897 static GTY(()) rtx fpscr_rtx;
7899 get_fpscr_rtx (void)
7903 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7904 REG_USERVAR_P (fpscr_rtx) = 1;
7905 mark_user_reg (fpscr_rtx);
7907 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7908 mark_user_reg (fpscr_rtx);
7913 emit_sf_insn (rtx pat)
7919 emit_df_insn (rtx pat)
7925 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7927 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7931 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7933 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7938 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7940 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7944 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7946 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7950 /* ??? gcc does flow analysis strictly after common subexpression
7951 elimination. As a result, common subexpression elimination fails
7952 when there are some intervening statements setting the same register.
7953 If we did nothing about this, this would hurt the precision switching
7954 for SH4 badly. There is some cse after reload, but it is unable to
7955 undo the extra register pressure from the unused instructions, and
7956 it cannot remove auto-increment loads.
7958 A C code example that shows this flow/cse weakness for (at least) SH
7959 and sparc (as of gcc ss-970706) is this:
7973 So we add another pass before common subexpression elimination, to
7974 remove assignments that are dead due to a following assignment in the
7975 same basic block. */
7978 mark_use (rtx x, rtx *reg_set_block)
7984 code = GET_CODE (x);
7989 int regno = REGNO (x);
7990 int nregs = (regno < FIRST_PSEUDO_REGISTER
7991 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7995 reg_set_block[regno + nregs - 1] = 0;
8002 rtx dest = SET_DEST (x);
8004 if (GET_CODE (dest) == SUBREG)
8005 dest = SUBREG_REG (dest);
8006 if (GET_CODE (dest) != REG)
8007 mark_use (dest, reg_set_block);
8008 mark_use (SET_SRC (x), reg_set_block);
8015 const char *fmt = GET_RTX_FORMAT (code);
8017 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8020 mark_use (XEXP (x, i), reg_set_block);
8021 else if (fmt[i] == 'E')
8022 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8023 mark_use (XVECEXP (x, i, j), reg_set_block);
8030 static rtx get_free_reg (HARD_REG_SET);
8032 /* This function returns a register to use to load the address to load
8033 the fpscr from. Currently it always returns r1 or r7, but when we are
8034 able to use pseudo registers after combine, or have a better mechanism
8035 for choosing a register, it should be done here. */
8036 /* REGS_LIVE is the liveness information for the point for which we
8037 need this allocation. In some bare-bones exit blocks, r1 is live at the
8038 start. We can even have all of r0..r3 being live:
8039 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8040 INSN before which new insns are placed with will clobber the register
8041 we return. If a basic block consists only of setting the return value
8042 register to a pseudo and using that register, the return value is not
8043 live before or after this block, yet we we'll insert our insns right in
8047 get_free_reg (HARD_REG_SET regs_live)
8049 if (! TEST_HARD_REG_BIT (regs_live, 1))
8050 return gen_rtx_REG (Pmode, 1);
8052 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8053 there shouldn't be anything but a jump before the function end. */
8054 if (! TEST_HARD_REG_BIT (regs_live, 7))
8055 return gen_rtx_REG (Pmode, 7);
8060 /* This function will set the fpscr from memory.
8061 MODE is the mode we are setting it to. */
8063 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8065 enum attr_fp_mode fp_mode = mode;
8066 rtx addr_reg = get_free_reg (regs_live);
8068 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8069 emit_insn (gen_fpu_switch1 (addr_reg));
8071 emit_insn (gen_fpu_switch0 (addr_reg));
8074 /* Is the given character a logical line separator for the assembler? */
8075 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8076 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8080 sh_insn_length_adjustment (rtx insn)
8082 /* Instructions with unfilled delay slots take up an extra two bytes for
8083 the nop in the delay slot. */
8084 if (((GET_CODE (insn) == INSN
8085 && GET_CODE (PATTERN (insn)) != USE
8086 && GET_CODE (PATTERN (insn)) != CLOBBER)
8087 || GET_CODE (insn) == CALL_INSN
8088 || (GET_CODE (insn) == JUMP_INSN
8089 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8090 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8091 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8092 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8095 /* SH2e has a bug that prevents the use of annulled branches, so if
8096 the delay slot is not filled, we'll have to put a NOP in it. */
8097 if (sh_cpu == CPU_SH2E
8098 && GET_CODE (insn) == JUMP_INSN
8099 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8100 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8101 && get_attr_type (insn) == TYPE_CBRANCH
8102 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8105 /* sh-dsp parallel processing insn take four bytes instead of two. */
8107 if (GET_CODE (insn) == INSN)
8110 rtx body = PATTERN (insn);
8111 const char *template;
8113 int maybe_label = 1;
8115 if (GET_CODE (body) == ASM_INPUT)
8116 template = XSTR (body, 0);
8117 else if (asm_noperands (body) >= 0)
8119 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8128 while (c == ' ' || c == '\t');
8129 /* all sh-dsp parallel-processing insns start with p.
8130 The only non-ppi sh insn starting with p is pref.
8131 The only ppi starting with pr is prnd. */
8132 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8134 /* The repeat pseudo-insn expands two three insns, a total of
8135 six bytes in size. */
8136 else if ((c == 'r' || c == 'R')
8137 && ! strncasecmp ("epeat", template, 5))
8139 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8141 /* If this is a label, it is obviously not a ppi insn. */
8142 if (c == ':' && maybe_label)
8147 else if (c == '\'' || c == '"')
8152 maybe_label = c != ':';
8160 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8161 isn't protected by a PIC unspec. */
8163 nonpic_symbol_mentioned_p (rtx x)
8165 register const char *fmt;
8168 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8169 || GET_CODE (x) == PC)
8172 /* We don't want to look into the possible MEM location of a
8173 CONST_DOUBLE, since we're not going to use it, in general. */
8174 if (GET_CODE (x) == CONST_DOUBLE)
8177 if (GET_CODE (x) == UNSPEC
8178 && (XINT (x, 1) == UNSPEC_PIC
8179 || XINT (x, 1) == UNSPEC_GOT
8180 || XINT (x, 1) == UNSPEC_GOTOFF
8181 || XINT (x, 1) == UNSPEC_GOTPLT
8182 || XINT (x, 1) == UNSPEC_GOTTPOFF
8183 || XINT (x, 1) == UNSPEC_DTPOFF
8184 || XINT (x, 1) == UNSPEC_PLT))
8187 fmt = GET_RTX_FORMAT (GET_CODE (x));
8188 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8194 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8195 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8198 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8205 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8206 @GOTOFF in `reg'. */
8208 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8211 if (tls_symbolic_operand (orig, Pmode))
8214 if (GET_CODE (orig) == LABEL_REF
8215 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8218 reg = gen_reg_rtx (Pmode);
8220 emit_insn (gen_symGOTOFF2reg (reg, orig));
8223 else if (GET_CODE (orig) == SYMBOL_REF)
8226 reg = gen_reg_rtx (Pmode);
8228 emit_insn (gen_symGOT2reg (reg, orig));
8234 /* Mark the use of a constant in the literal table. If the constant
8235 has multiple labels, make it unique. */
8237 mark_constant_pool_use (rtx x)
8239 rtx insn, lab, pattern;
8244 switch (GET_CODE (x))
8254 /* Get the first label in the list of labels for the same constant
8255 and delete another labels in the list. */
8257 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8259 if (GET_CODE (insn) != CODE_LABEL
8260 || LABEL_REFS (insn) != NEXT_INSN (insn))
8265 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8266 INSN_DELETED_P (insn) = 1;
8268 /* Mark constants in a window. */
8269 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8271 if (GET_CODE (insn) != INSN)
8274 pattern = PATTERN (insn);
8275 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8278 switch (XINT (pattern, 1))
8280 case UNSPECV_CONST2:
8281 case UNSPECV_CONST4:
8282 case UNSPECV_CONST8:
8283 XVECEXP (pattern, 0, 1) = const1_rtx;
8285 case UNSPECV_WINDOW_END:
8286 if (XVECEXP (pattern, 0, 0) == x)
8289 case UNSPECV_CONST_END:
8299 /* Return true if it's possible to redirect BRANCH1 to the destination
8300 of an unconditional jump BRANCH2. We only want to do this if the
8301 resulting branch will have a short displacement. */
8303 sh_can_redirect_branch (rtx branch1, rtx branch2)
8305 if (flag_expensive_optimizations && simplejump_p (branch2))
8307 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8311 for (distance = 0, insn = NEXT_INSN (branch1);
8312 insn && distance < 256;
8313 insn = PREV_INSN (insn))
8318 distance += get_attr_length (insn);
8320 for (distance = 0, insn = NEXT_INSN (branch1);
8321 insn && distance < 256;
8322 insn = NEXT_INSN (insn))
8327 distance += get_attr_length (insn);
8333 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8335 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8336 unsigned int new_reg)
8338 /* Interrupt functions can only use registers that have already been
8339 saved by the prologue, even if they would normally be
8342 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8348 /* Function to update the integer COST
8349 based on the relationship between INSN that is dependent on
8350 DEP_INSN through the dependence LINK. The default is to make no
8351 adjustment to COST. This can be used for example to specify to
8352 the scheduler that an output- or anti-dependence does not incur
8353 the same cost as a data-dependence. The return value should be
8354 the new value for COST. */
8356 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8362 /* On SHmedia, if the dependence is an anti-dependence or
8363 output-dependence, there is no cost. */
8364 if (REG_NOTE_KIND (link) != 0)
8367 if (get_attr_is_mac_media (insn)
8368 && get_attr_is_mac_media (dep_insn))
8371 else if (REG_NOTE_KIND (link) == 0)
8373 enum attr_type dep_type, type;
8375 if (recog_memoized (insn) < 0
8376 || recog_memoized (dep_insn) < 0)
8379 dep_type = get_attr_type (dep_insn);
8380 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8382 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8383 && (type = get_attr_type (insn)) != TYPE_CALL
8384 && type != TYPE_SFUNC)
8387 /* The only input for a call that is timing-critical is the
8388 function's address. */
8389 if (GET_CODE(insn) == CALL_INSN)
8391 rtx call = PATTERN (insn);
8393 if (GET_CODE (call) == PARALLEL)
8394 call = XVECEXP (call, 0 ,0);
8395 if (GET_CODE (call) == SET)
8396 call = SET_SRC (call);
8397 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8398 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8401 /* Likewise, the most timing critical input for an sfuncs call
8402 is the function address. However, sfuncs typically start
8403 using their arguments pretty quickly.
8404 Assume a four cycle delay before they are needed. */
8405 /* All sfunc calls are parallels with at least four components.
8406 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8407 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8408 && XVECLEN (PATTERN (insn), 0) >= 4
8409 && (reg = sfunc_uses_reg (insn)))
8411 if (! reg_set_p (reg, dep_insn))
8414 /* When the preceding instruction loads the shift amount of
8415 the following SHAD/SHLD, the latency of the load is increased
8418 && get_attr_type (insn) == TYPE_DYN_SHIFT
8419 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8420 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8421 XEXP (SET_SRC (single_set (insn)),
8424 /* When an LS group instruction with a latency of less than
8425 3 cycles is followed by a double-precision floating-point
8426 instruction, FIPR, or FTRV, the latency of the first
8427 instruction is increased to 3 cycles. */
8429 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8430 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8432 /* The lsw register of a double-precision computation is ready one
8434 else if (reload_completed
8435 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8436 && (use_pat = single_set (insn))
8437 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8441 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8442 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8445 /* An anti-dependence penalty of two applies if the first insn is a double
8446 precision fadd / fsub / fmul. */
8447 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8448 && recog_memoized (dep_insn) >= 0
8449 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8450 /* A lot of alleged anti-flow dependences are fake,
8451 so check this one is real. */
8452 && flow_dependent_p (dep_insn, insn))
8459 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8460 if DEP_INSN is anti-flow dependent on INSN. */
8462 flow_dependent_p (rtx insn, rtx dep_insn)
8464 rtx tmp = PATTERN (insn);
8466 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8467 return tmp == NULL_RTX;
8470 /* A helper function for flow_dependent_p called through note_stores. */
8472 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8474 rtx * pinsn = (rtx *) data;
8476 if (*pinsn && reg_referenced_p (x, *pinsn))
8480 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8481 'special function' patterns (type sfunc) that clobber pr, but that
8482 do not look like function calls to leaf_function_p. Hence we must
8483 do this extra check. */
8487 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8490 /* This Function returns nonzero if the DFA based scheduler interface
8491 is to be used. At present this is only supported properly for the SH4.
8492 For the SH1 the current DFA model is just the converted form of the old
8493 pipeline model description. */
8495 sh_use_dfa_interface (void)
8503 /* This function returns "2" to indicate dual issue for the SH4
8504 processor. To be used by the DFA pipeline description. */
8506 sh_issue_rate (void)
8508 if (TARGET_SUPERSCALAR)
8514 /* Functions for ready queue reordering for sched1. */
8516 /* Get weight for mode for a set x. */
8518 find_set_regmode_weight (rtx x, enum machine_mode mode)
8520 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8522 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8524 if (GET_CODE (SET_DEST (x)) == REG)
8526 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8536 /* Get regmode weight for insn. */
8538 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8540 short reg_weight = 0;
8543 /* Increment weight for each register born here. */
8545 reg_weight += find_set_regmode_weight (x, mode);
8546 if (GET_CODE (x) == PARALLEL)
8549 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8551 x = XVECEXP (PATTERN (insn), 0, j);
8552 reg_weight += find_set_regmode_weight (x, mode);
8555 /* Decrement weight for each register that dies here. */
8556 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8558 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8560 rtx note = XEXP (x, 0);
8561 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8568 /* Calculate regmode weights for all insns of a basic block. */
8570 find_regmode_weight (int b, enum machine_mode mode)
8572 rtx insn, next_tail, head, tail;
8574 get_block_head_tail (b, &head, &tail);
8575 next_tail = NEXT_INSN (tail);
8577 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8579 /* Handle register life information. */
8584 INSN_REGMODE_WEIGHT (insn, mode) =
8585 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8586 else if (mode == SImode)
8587 INSN_REGMODE_WEIGHT (insn, mode) =
8588 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8592 /* Comparison function for ready queue sorting. */
8594 rank_for_reorder (const void *x, const void *y)
8596 rtx tmp = *(const rtx *) y;
8597 rtx tmp2 = *(const rtx *) x;
8599 /* The insn in a schedule group should be issued the first. */
8600 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8601 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8603 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8604 minimizes instruction movement, thus minimizing sched's effect on
8605 register pressure. */
8606 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8609 /* Resort the array A in which only element at index N may be out of order. */
8611 swap_reorder (rtx *a, int n)
8613 rtx insn = a[n - 1];
8616 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8624 #define SCHED_REORDER(READY, N_READY) \
8627 if ((N_READY) == 2) \
8628 swap_reorder (READY, N_READY); \
8629 else if ((N_READY) > 2) \
8630 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8634 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8637 ready_reorder (rtx *ready, int nready)
8639 SCHED_REORDER (ready, nready);
8642 /* Calculate regmode weights for all insns of all basic block. */
8644 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8645 int verbose ATTRIBUTE_UNUSED,
8650 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8651 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8653 FOR_EACH_BB_REVERSE (b)
8655 find_regmode_weight (b->index, SImode);
8656 find_regmode_weight (b->index, SFmode);
8659 CURR_REGMODE_PRESSURE (SImode) = 0;
8660 CURR_REGMODE_PRESSURE (SFmode) = 0;
8666 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8667 int verbose ATTRIBUTE_UNUSED)
8669 if (regmode_weight[0])
8671 free (regmode_weight[0]);
8672 regmode_weight[0] = NULL;
8674 if (regmode_weight[1])
8676 free (regmode_weight[1]);
8677 regmode_weight[1] = NULL;
8681 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8682 keep count of register pressures on SImode and SFmode. */
8684 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8685 int sched_verbose ATTRIBUTE_UNUSED,
8689 if (GET_CODE (PATTERN (insn)) != USE
8690 && GET_CODE (PATTERN (insn)) != CLOBBER)
8691 cached_can_issue_more = can_issue_more - 1;
8693 cached_can_issue_more = can_issue_more;
8695 if (reload_completed)
8696 return cached_can_issue_more;
8698 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8699 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8701 return cached_can_issue_more;
8705 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8706 int verbose ATTRIBUTE_UNUSED,
8707 int veclen ATTRIBUTE_UNUSED)
8709 CURR_REGMODE_PRESSURE (SImode) = 0;
8710 CURR_REGMODE_PRESSURE (SFmode) = 0;
8713 /* Some magic numbers. */
8714 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8715 functions that already have high pressure on r0. */
8716 #define R0_MAX_LIFE_REGIONS 2
8717 #define R0_MAX_LIVE_LENGTH 12
8718 /* Register Pressure thresholds for SImode and SFmode registers. */
8719 #define SIMODE_MAX_WEIGHT 5
8720 #define SFMODE_MAX_WEIGHT 10
8722 /* Return true if the pressure is high for MODE. */
8724 high_pressure (enum machine_mode mode)
8726 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8727 functions that already have high pressure on r0. */
8728 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8729 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8733 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8735 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8738 /* Reorder ready queue if register pressure is high. */
8740 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8741 int sched_verbose ATTRIBUTE_UNUSED,
8744 int clock_var ATTRIBUTE_UNUSED)
8746 if (reload_completed)
8747 return sh_issue_rate ();
8749 if (high_pressure (SFmode) || high_pressure (SImode))
8751 ready_reorder (ready, *n_readyp);
8754 return sh_issue_rate ();
8757 /* Skip cycles if the current register pressure is high. */
8759 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8760 int sched_verbose ATTRIBUTE_UNUSED,
8761 rtx *ready ATTRIBUTE_UNUSED,
8762 int *n_readyp ATTRIBUTE_UNUSED,
8763 int clock_var ATTRIBUTE_UNUSED)
8765 if (reload_completed)
8766 return cached_can_issue_more;
8768 if (high_pressure(SFmode) || high_pressure (SImode))
8771 return cached_can_issue_more;
8774 /* Skip cycles without sorting the ready queue. This will move insn from
8775 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8776 queue by sh_reorder. */
8778 /* Generally, skipping these many cycles are sufficient for all insns to move
8783 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8784 int sched_verbose ATTRIBUTE_UNUSED,
8785 rtx insn ATTRIBUTE_UNUSED,
8790 if (reload_completed)
8795 if ((clock_var - last_clock_var) < MAX_SKIPS)
8800 /* If this is the last cycle we are skipping, allow reordering of R. */
8801 if ((clock_var - last_clock_var) == MAX_SKIPS)
8813 /* SHmedia requires registers for branches, so we can't generate new
8814 branches past reload. */
8816 sh_cannot_modify_jumps_p (void)
8818 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8822 sh_target_reg_class (void)
8824 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8828 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8830 return (shmedia_space_reserved_for_target_registers
8831 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8835 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8837 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8841 On the SH1..SH4, the trampoline looks like
8842 2 0002 D202 mov.l l2,r2
8843 1 0000 D301 mov.l l1,r3
8846 5 0008 00000000 l1: .long area
8847 6 000c 00000000 l2: .long function
8849 SH5 (compact) uses r1 instead of r3 for the static chain. */
8852 /* Emit RTL insns to initialize the variable parts of a trampoline.
8853 FNADDR is an RTX for the address of the function's pure code.
8854 CXT is an RTX for the static chain value for the function. */
8857 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8859 if (TARGET_SHMEDIA64)
8864 rtx movi1 = GEN_INT (0xcc000010);
8865 rtx shori1 = GEN_INT (0xc8000010);
8868 /* The following trampoline works within a +- 128 KB range for cxt:
8869 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8870 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8871 gettr tr1,r1; blink tr0,r63 */
8872 /* Address rounding makes it hard to compute the exact bounds of the
8873 offset for this trampoline, but we have a rather generous offset
8874 range, so frame_offset should do fine as an upper bound. */
8875 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8877 /* ??? could optimize this trampoline initialization
8878 by writing DImode words with two insns each. */
8879 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8880 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8881 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8882 insn = gen_rtx_AND (DImode, insn, mask);
8883 /* Or in ptb/u .,tr1 pattern */
8884 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8885 insn = force_operand (insn, NULL_RTX);
8886 insn = gen_lowpart (SImode, insn);
8887 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8888 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8889 insn = gen_rtx_AND (DImode, insn, mask);
8890 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8891 insn = gen_lowpart (SImode, insn);
8892 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8893 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8894 insn = gen_rtx_AND (DImode, insn, mask);
8895 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8896 insn = gen_lowpart (SImode, insn);
8897 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8898 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8899 insn = gen_rtx_AND (DImode, insn, mask);
8900 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8901 insn = gen_lowpart (SImode, insn);
8902 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8904 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8905 insn = gen_rtx_AND (DImode, insn, mask);
8906 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8907 insn = gen_lowpart (SImode, insn);
8908 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8910 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8911 GEN_INT (0x6bf10600));
8912 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8913 GEN_INT (0x4415fc10));
8914 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8915 GEN_INT (0x4401fff0));
8916 emit_insn (gen_ic_invalidate_line (tramp));
8919 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8920 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8922 tramp_templ = gen_datalabel_ref (tramp_templ);
8923 dst = gen_rtx_MEM (BLKmode, tramp);
8924 src = gen_rtx_MEM (BLKmode, tramp_templ);
8925 set_mem_align (dst, 256);
8926 set_mem_align (src, 64);
8927 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8929 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8931 emit_move_insn (gen_rtx_MEM (Pmode,
8932 plus_constant (tramp,
8934 + GET_MODE_SIZE (Pmode))),
8936 emit_insn (gen_ic_invalidate_line (tramp));
8939 else if (TARGET_SHMEDIA)
8941 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8942 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8943 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8944 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8945 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8946 rotated 10 right, and higher 16 bit of every 32 selected. */
8948 = force_reg (V2HImode, (simplify_gen_subreg
8949 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8950 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8951 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8953 tramp = force_reg (Pmode, tramp);
8954 fnaddr = force_reg (SImode, fnaddr);
8955 cxt = force_reg (SImode, cxt);
8956 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8957 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8959 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8960 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8961 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
8962 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8963 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8964 gen_rtx_SUBREG (V2HImode, cxt, 0),
8966 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8967 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8968 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
8969 if (TARGET_LITTLE_ENDIAN)
8971 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8972 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8976 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8977 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8979 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8980 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8981 emit_insn (gen_ic_invalidate_line (tramp));
8984 else if (TARGET_SHCOMPACT)
8986 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8989 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8990 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8992 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8993 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8995 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8997 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9001 if (TARGET_USERMODE)
9002 emit_library_call (function_symbol ("__ic_invalidate"),
9003 0, VOIDmode, 1, tramp, SImode);
9005 emit_insn (gen_ic_invalidate_line (tramp));
9009 /* FIXME: This is overly conservative. A SHcompact function that
9010 receives arguments ``by reference'' will have them stored in its
9011 own stack frame, so it must not pass pointers or references to
9012 these arguments to other functions by means of sibling calls. */
9014 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9017 && (! TARGET_SHCOMPACT
9018 || current_function_args_info.stack_regs == 0)
9019 && ! sh_cfun_interrupt_handler_p ());
9022 /* Machine specific built-in functions. */
9024 struct builtin_description
9026 const enum insn_code icode;
9027 const char *const name;
9031 /* describe number and signedness of arguments; arg[0] == result
9032 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9033 static const char signature_args[][4] =
9035 #define SH_BLTIN_V2SI2 0
9037 #define SH_BLTIN_V4HI2 1
9039 #define SH_BLTIN_V2SI3 2
9041 #define SH_BLTIN_V4HI3 3
9043 #define SH_BLTIN_V8QI3 4
9045 #define SH_BLTIN_MAC_HISI 5
9047 #define SH_BLTIN_SH_HI 6
9049 #define SH_BLTIN_SH_SI 7
9051 #define SH_BLTIN_V4HI2V2SI 8
9053 #define SH_BLTIN_V4HI2V8QI 9
9055 #define SH_BLTIN_SISF 10
9057 #define SH_BLTIN_LDUA_L 11
9059 #define SH_BLTIN_LDUA_Q 12
9061 #define SH_BLTIN_STUA_L 13
9063 #define SH_BLTIN_STUA_Q 14
9065 #define SH_BLTIN_UDI 15
9067 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9068 #define SH_BLTIN_2 16
9069 #define SH_BLTIN_SU 16
9071 #define SH_BLTIN_3 17
9072 #define SH_BLTIN_SUS 17
9074 #define SH_BLTIN_PSSV 18
9076 #define SH_BLTIN_XXUU 19
9077 #define SH_BLTIN_UUUU 19
9079 #define SH_BLTIN_PV 20
9082 /* mcmv: operands considered unsigned. */
9083 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9084 /* mperm: control value considered unsigned int. */
9085 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9086 /* mshards_q: returns signed short. */
9087 /* nsb: takes long long arg, returns unsigned char. */
9088 static const struct builtin_description bdesc[] =
9090 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9091 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9092 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9093 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9094 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9095 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9096 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9098 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9099 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9101 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9102 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9103 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9104 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9105 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9106 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9107 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9108 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9109 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9110 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9111 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9112 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9113 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9114 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9115 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9116 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9117 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9118 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9119 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9120 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9121 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9122 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9123 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9124 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9125 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9126 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9127 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9128 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9129 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9130 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9131 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9132 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9133 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9134 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9135 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9136 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9137 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9138 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9139 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9140 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9141 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9142 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9143 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9144 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9145 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9146 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9147 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9148 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9149 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9150 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9151 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9152 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9153 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9154 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9156 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9157 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9158 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9159 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9160 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9161 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9162 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9163 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9164 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9165 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9166 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9167 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9168 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9169 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9170 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9171 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9173 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9174 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9176 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9177 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9182 sh_media_init_builtins (void)
9184 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9185 const struct builtin_description *d;
9187 memset (shared, 0, sizeof shared);
9188 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9190 tree type, arg_type;
9191 int signature = d->signature;
9194 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9195 type = shared[signature];
9198 int has_result = signature_args[signature][0] != 0;
9200 if (signature_args[signature][1] == 8
9201 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9203 if (! TARGET_FPU_ANY
9204 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9206 type = void_list_node;
9209 int arg = signature_args[signature][i];
9210 int opno = i - 1 + has_result;
9213 arg_type = ptr_type_node;
9215 arg_type = ((*lang_hooks.types.type_for_mode)
9216 (insn_data[d->icode].operand[opno].mode,
9221 arg_type = void_type_node;
9224 type = tree_cons (NULL_TREE, arg_type, type);
9226 type = build_function_type (arg_type, type);
9227 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9228 shared[signature] = type;
9230 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9236 sh_init_builtins (void)
9239 sh_media_init_builtins ();
9242 /* Expand an expression EXP that calls a built-in function,
9243 with result going to TARGET if that's convenient
9244 (and in mode MODE if that's convenient).
9245 SUBTARGET may be used as the target for computing one of EXP's operands.
9246 IGNORE is nonzero if the value is to be ignored. */
9249 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9250 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9252 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9253 tree arglist = TREE_OPERAND (exp, 1);
9254 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9255 const struct builtin_description *d = &bdesc[fcode];
9256 enum insn_code icode = d->icode;
9257 int signature = d->signature;
9258 enum machine_mode tmode = VOIDmode;
9263 if (signature_args[signature][0])
9268 tmode = insn_data[icode].operand[0].mode;
9270 || GET_MODE (target) != tmode
9271 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9272 target = gen_reg_rtx (tmode);
9278 for (i = 1; i <= 3; i++, nop++)
9281 enum machine_mode opmode, argmode;
9283 if (! signature_args[signature][i])
9285 arg = TREE_VALUE (arglist);
9286 if (arg == error_mark_node)
9288 arglist = TREE_CHAIN (arglist);
9289 opmode = insn_data[icode].operand[nop].mode;
9290 argmode = TYPE_MODE (TREE_TYPE (arg));
9291 if (argmode != opmode)
9292 arg = build1 (NOP_EXPR,
9293 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9294 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9295 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9296 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9302 pat = (*insn_data[d->icode].genfun) (op[0]);
9305 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9308 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9311 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9323 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9325 rtx sel0 = const0_rtx;
9326 rtx sel1 = const1_rtx;
9327 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9328 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9330 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9331 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9335 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9337 rtx sel0 = const0_rtx;
9338 rtx sel1 = const1_rtx;
9339 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9341 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9343 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9344 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9347 /* Return the class of registers for which a mode change from FROM to TO
9350 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9351 enum reg_class class)
9353 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9355 if (TARGET_LITTLE_ENDIAN)
9357 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9358 return reg_classes_intersect_p (DF_REGS, class);
9362 if (GET_MODE_SIZE (from) < 8)
9363 return reg_classes_intersect_p (DF_HI_REGS, class);
9370 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9371 that label is used. */
9374 sh_mark_label (rtx address, int nuses)
9376 if (GOTOFF_P (address))
9378 /* Extract the label or symbol. */
9379 address = XEXP (address, 0);
9380 if (GET_CODE (address) == PLUS)
9381 address = XEXP (address, 0);
9382 address = XVECEXP (address, 0, 0);
9384 if (GET_CODE (address) == LABEL_REF
9385 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9386 LABEL_NUSES (XEXP (address, 0)) += nuses;
9389 /* Compute extra cost of moving data between one register class
9392 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9393 uses this information. Hence, the general register <-> floating point
9394 register information here is not used for SFmode. */
9397 sh_register_move_cost (enum machine_mode mode,
9398 enum reg_class srcclass, enum reg_class dstclass)
9400 if (dstclass == T_REGS || dstclass == PR_REGS)
9403 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9406 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9407 && REGCLASS_HAS_FP_REG (srcclass)
9408 && REGCLASS_HAS_FP_REG (dstclass))
9411 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9412 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9415 if ((REGCLASS_HAS_FP_REG (dstclass)
9416 && REGCLASS_HAS_GENERAL_REG (srcclass))
9417 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9418 && REGCLASS_HAS_FP_REG (srcclass)))
9419 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9420 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9422 if ((dstclass == FPUL_REGS
9423 && REGCLASS_HAS_GENERAL_REG (srcclass))
9424 || (srcclass == FPUL_REGS
9425 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9428 if ((dstclass == FPUL_REGS
9429 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9430 || (srcclass == FPUL_REGS
9431 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9434 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9435 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9438 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9439 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9444 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9445 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9446 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9448 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9451 /* Like register_operand, but take into account that SHMEDIA can use
9452 the constant zero like a general register. */
9454 sh_register_operand (rtx op, enum machine_mode mode)
9456 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9458 return register_operand (op, mode);
9462 cmpsi_operand (rtx op, enum machine_mode mode)
9464 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9465 && GET_MODE (op) == SImode)
9467 return arith_operand (op, mode);
9470 static rtx emit_load_ptr (rtx, rtx);
9473 emit_load_ptr (rtx reg, rtx addr)
9475 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9477 if (Pmode != ptr_mode)
9478 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9479 return emit_move_insn (reg, mem);
9483 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9484 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9487 CUMULATIVE_ARGS cum;
9488 int structure_value_byref = 0;
9489 rtx this, this_value, sibcall, insns, funexp;
9490 tree funtype = TREE_TYPE (function);
9491 int simple_add = CONST_OK_FOR_ADD (delta);
9493 rtx scratch0, scratch1, scratch2;
9495 reload_completed = 1;
9496 epilogue_completed = 1;
9498 current_function_uses_only_leaf_regs = 1;
9499 reset_block_changes ();
9501 emit_note (NOTE_INSN_PROLOGUE_END);
9503 /* Find the "this" pointer. We have such a wide range of ABIs for the
9504 SH that it's best to do this completely machine independently.
9505 "this" is passed as first argument, unless a structure return pointer
9506 comes first, in which case "this" comes second. */
9507 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9508 #ifndef PCC_STATIC_STRUCT_RETURN
9509 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9510 structure_value_byref = 1;
9511 #endif /* not PCC_STATIC_STRUCT_RETURN */
9512 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9514 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9516 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9518 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9520 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9521 static chain pointer (even if you can't have nested virtual functions
9522 right now, someone might implement them sometime), and the rest of the
9523 registers are used for argument passing, are callee-saved, or reserved. */
9524 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9527 scratch1 = gen_rtx_REG (ptr_mode, 1);
9528 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9529 pointing where to return struct values. */
9530 scratch2 = gen_rtx_REG (Pmode, 3);
9532 else if (TARGET_SHMEDIA)
9534 scratch1 = gen_rtx_REG (ptr_mode, 21);
9535 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9538 this_value = plus_constant (this, delta);
9540 && (simple_add || scratch0 != scratch1)
9541 && strict_memory_address_p (ptr_mode, this_value))
9543 emit_load_ptr (scratch0, this_value);
9549 else if (simple_add)
9550 emit_move_insn (this, this_value);
9553 emit_move_insn (scratch1, GEN_INT (delta));
9554 emit_insn (gen_add2_insn (this, scratch1));
9562 emit_load_ptr (scratch0, this);
9564 offset_addr = plus_constant (scratch0, vcall_offset);
9565 if (strict_memory_address_p (ptr_mode, offset_addr))
9567 else if (! TARGET_SH5)
9569 /* scratch0 != scratch1, and we have indexed loads. Get better
9570 schedule by loading the offset into r1 and using an indexed
9571 load - then the load of r1 can issue before the load from
9572 (this + delta) finishes. */
9573 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9574 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9576 else if (CONST_OK_FOR_ADD (vcall_offset))
9578 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9579 offset_addr = scratch0;
9581 else if (scratch0 != scratch1)
9583 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9584 emit_insn (gen_add2_insn (scratch0, scratch1));
9585 offset_addr = scratch0;
9588 abort (); /* FIXME */
9589 emit_load_ptr (scratch0, offset_addr);
9591 if (Pmode != ptr_mode)
9592 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9593 emit_insn (gen_add2_insn (this, scratch0));
9596 /* Generate a tail call to the target function. */
9597 if (! TREE_USED (function))
9599 assemble_external (function);
9600 TREE_USED (function) = 1;
9602 funexp = XEXP (DECL_RTL (function), 0);
9603 emit_move_insn (scratch2, funexp);
9604 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9605 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9606 SIBLING_CALL_P (sibcall) = 1;
9607 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9610 /* Run just enough of rest_of_compilation to do scheduling and get
9611 the insns emitted. Note that use_thunk calls
9612 assemble_start_function and assemble_end_function. */
9614 insn_locators_initialize ();
9615 insns = get_insns ();
9617 if (optimize > 0 && flag_schedule_insns_after_reload)
9619 find_basic_blocks (insns, max_reg_num (), dump_file);
9620 life_analysis (dump_file, PROP_FINAL);
9622 split_all_insns (1);
9624 schedule_insns (dump_file);
9629 if (optimize > 0 && flag_delayed_branch)
9630 dbr_schedule (insns, dump_file);
9631 shorten_branches (insns);
9632 final_start_function (insns, file, 1);
9633 final (insns, file, 1, 0);
9634 final_end_function ();
9636 if (optimize > 0 && flag_schedule_insns_after_reload)
9638 /* Release all memory allocated by flow. */
9639 free_basic_block_vars ();
9641 /* Release all memory held by regsets now. */
9642 regset_release_memory ();
9645 reload_completed = 0;
9646 epilogue_completed = 0;
9651 function_symbol (const char *name)
9653 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9654 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9658 /* Find the number of a general purpose register in S. */
9660 scavenge_reg (HARD_REG_SET *s)
9663 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9664 if (TEST_HARD_REG_BIT (*s, r))
9670 sh_get_pr_initial_val (void)
9674 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9675 PR register on SHcompact, because it might be clobbered by the prologue.
9676 We check first if that is known to be the case. */
9677 if (TARGET_SHCOMPACT
9678 && ((current_function_args_info.call_cookie
9679 & ~ CALL_COOKIE_RET_TRAMP (1))
9680 || current_function_has_nonlocal_label))
9681 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9683 /* If we haven't finished rtl generation, there might be a nonlocal label
9684 that we haven't seen yet.
9685 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9686 is set, unless it has been called before for the same register. And even
9687 then, we end in trouble if we didn't use the register in the same
9688 basic block before. So call get_hard_reg_initial_val now and wrap it
9689 in an unspec if we might need to replace it. */
9690 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9691 combine can put the pseudo returned by get_hard_reg_initial_val into
9692 instructions that need a general purpose registers, which will fail to
9693 be recognized when the pseudo becomes allocated to PR. */
9695 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9697 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9702 sh_expand_t_scc (enum rtx_code code, rtx target)
9704 rtx result = target;
9707 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9708 || GET_CODE (sh_compare_op1) != CONST_INT)
9710 if (GET_CODE (result) != REG)
9711 result = gen_reg_rtx (SImode);
9712 val = INTVAL (sh_compare_op1);
9713 if ((code == EQ && val == 1) || (code == NE && val == 0))
9714 emit_insn (gen_movt (result));
9715 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9717 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9718 emit_insn (gen_subc (result, result, result));
9719 emit_insn (gen_addsi3 (result, result, const1_rtx));
9721 else if (code == EQ || code == NE)
9722 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9725 if (result != target)
9726 emit_move_insn (target, result);
9730 /* INSN is an sfunc; return the rtx that describes the address used. */
9732 extract_sfunc_addr (rtx insn)
9734 rtx pattern, part = NULL_RTX;
9737 pattern = PATTERN (insn);
9738 len = XVECLEN (pattern, 0);
9739 for (i = 0; i < len; i++)
9741 part = XVECEXP (pattern, 0, i);
9742 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9743 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9744 return XEXP (part, 0);
9746 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9747 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9751 /* Verify that the register in use_sfunc_addr still agrees with the address
9752 used in the sfunc. This prevents fill_slots_from_thread from changing
9754 INSN is the use_sfunc_addr instruction, and REG is the register it
9757 check_use_sfunc_addr (rtx insn, rtx reg)
9759 /* Search for the sfunc. It should really come right after INSN. */
9760 while ((insn = NEXT_INSN (insn)))
9762 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9764 if (! INSN_P (insn))
9767 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9768 insn = XVECEXP (PATTERN (insn), 0, 0);
9769 if (GET_CODE (PATTERN (insn)) != PARALLEL
9770 || get_attr_type (insn) != TYPE_SFUNC)
9772 return rtx_equal_p (extract_sfunc_addr (insn), reg);