1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
55 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
57 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
58 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
60 /* These are some macros to abstract register modes. */
61 #define CONST_OK_FOR_ADD(size) \
62 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
63 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
64 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
65 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
67 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
68 int current_function_interrupt;
70 /* ??? The pragma interrupt support will not work for SH3. */
71 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
72 output code for the next function appropriate for an interrupt handler. */
75 /* This is set by the trap_exit attribute for functions. It specifies
76 a trap number to be used in a trapa instruction at function exit
77 (instead of an rte instruction). */
80 /* This is used by the sp_switch attribute for functions. It specifies
81 a variable holding the address of the stack the interrupt function
82 should switch to/from at entry/exit. */
85 /* This is set by #pragma trapa, and is similar to the above, except that
86 the compiler doesn't emit code to preserve all registers. */
87 static int pragma_trapa;
89 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
90 which has a separate set of low regs for User and Supervisor modes.
91 This should only be used for the lowest level of interrupts. Higher levels
92 of interrupts must save the registers in case they themselves are
94 int pragma_nosave_low_regs;
96 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
97 sh_expand_prologue. */
98 int current_function_anonymous_args;
100 /* Global variables for machine-dependent things. */
102 /* Which cpu are we scheduling for. */
103 enum processor_type sh_cpu;
105 /* Definitions used in ready queue reordering for first scheduling pass. */
107 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
108 static short *regmode_weight[2];
110 /* Total SFmode and SImode weights of scheduled insns. */
111 static int curr_regmode_pressure[2];
113 /* If true, skip cycles for Q -> R movement. */
114 static int skip_cycles = 0;
116 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
117 and returned from sh_reorder2. */
118 static short cached_can_issue_more;
120 /* Saved operands from the last compare to use when we generate an scc
126 /* Provides the class number of the smallest class containing
129 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
131 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
164 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
165 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
166 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
167 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
168 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 char sh_register_names[FIRST_PSEUDO_REGISTER] \
173 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
175 char sh_additional_register_names[ADDREGNAMES_SIZE] \
176 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
177 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
179 /* Provide reg_class from a letter such as appears in the machine
180 description. *: target independently reserved letter.
181 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
183 enum reg_class reg_class_from_letter[] =
185 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
186 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
187 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
188 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
189 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
190 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
191 /* y */ FPUL_REGS, /* z */ R0_REGS
194 int assembler_dialect;
196 static bool shmedia_space_reserved_for_target_registers;
198 static void split_branches (rtx);
199 static int branch_dest (rtx);
200 static void force_into (rtx, rtx);
201 static void print_slot (rtx);
202 static rtx add_constant (rtx, enum machine_mode, rtx);
203 static void dump_table (rtx, rtx);
204 static int hi_const (rtx);
205 static int broken_move (rtx);
206 static int mova_p (rtx);
207 static rtx find_barrier (int, rtx, rtx);
208 static int noncall_uses_reg (rtx, rtx, rtx *);
209 static rtx gen_block_redirect (rtx, int, int);
210 static void sh_reorg (void);
211 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
212 static rtx frame_insn (rtx);
213 static rtx push (int);
214 static void pop (int);
215 static void push_regs (HARD_REG_SET *, int);
216 static int calc_live_regs (HARD_REG_SET *);
217 static void mark_use (rtx, rtx *);
218 static HOST_WIDE_INT rounded_frame_size (int);
219 static rtx mark_constant_pool_use (rtx);
220 const struct attribute_spec sh_attribute_table[];
221 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
222 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
223 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
225 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
226 static void sh_insert_attributes (tree, tree *);
227 static int sh_adjust_cost (rtx, rtx, rtx, int);
228 static int sh_use_dfa_interface (void);
229 static int sh_issue_rate (void);
230 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
231 static short find_set_regmode_weight (rtx, enum machine_mode);
232 static short find_insn_regmode_weight (rtx, enum machine_mode);
233 static void find_regmode_weight (int, enum machine_mode);
234 static void sh_md_init_global (FILE *, int, int);
235 static void sh_md_finish_global (FILE *, int);
236 static int rank_for_reorder (const void *, const void *);
237 static void swap_reorder (rtx *, int);
238 static void ready_reorder (rtx *, int);
239 static short high_pressure (enum machine_mode);
240 static int sh_reorder (FILE *, int, rtx *, int *, int);
241 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
242 static void sh_md_init (FILE *, int, int);
243 static int sh_variable_issue (FILE *, int, rtx, int);
245 static bool sh_function_ok_for_sibcall (tree, tree);
247 static bool sh_cannot_modify_jumps_p (void);
248 static int sh_target_reg_class (void);
249 static bool sh_optimize_target_register_callee_saved (bool);
250 static bool sh_ms_bitfield_layout_p (tree);
252 static void sh_init_builtins (void);
253 static void sh_media_init_builtins (void);
254 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
255 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
256 static void sh_file_start (void);
257 static int flow_dependent_p (rtx, rtx);
258 static void flow_dependent_p_1 (rtx, rtx, void *);
259 static int shiftcosts (rtx);
260 static int andcosts (rtx);
261 static int addsubcosts (rtx);
262 static int multcosts (rtx);
263 static bool unspec_caller_rtx_p (rtx);
264 static bool sh_cannot_copy_insn_p (rtx);
265 static bool sh_rtx_costs (rtx, int, int, int *);
266 static int sh_address_cost (rtx);
267 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
268 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
269 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
270 static int scavenge_reg (HARD_REG_SET *s);
271 struct save_schedule_s;
272 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
273 struct save_schedule_s *, int);
275 static rtx sh_struct_value_rtx (tree, int);
276 static bool sh_return_in_memory (tree, tree);
277 static rtx sh_builtin_saveregs (void);
278 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
279 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
280 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
281 static tree sh_build_builtin_va_list (void);
284 /* Initialize the GCC target structure. */
285 #undef TARGET_ATTRIBUTE_TABLE
286 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
288 /* The next two are used for debug info when compiling with -gdwarf. */
289 #undef TARGET_ASM_UNALIGNED_HI_OP
290 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
291 #undef TARGET_ASM_UNALIGNED_SI_OP
292 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
294 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
295 #undef TARGET_ASM_UNALIGNED_DI_OP
296 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
297 #undef TARGET_ASM_ALIGNED_DI_OP
298 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
300 #undef TARGET_ASM_FUNCTION_EPILOGUE
301 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
303 #undef TARGET_ASM_OUTPUT_MI_THUNK
304 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
306 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
307 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
309 #undef TARGET_ASM_FILE_START
310 #define TARGET_ASM_FILE_START sh_file_start
311 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
312 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
314 #undef TARGET_INSERT_ATTRIBUTES
315 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
317 #undef TARGET_SCHED_ADJUST_COST
318 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
320 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
321 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
323 #undef TARGET_SCHED_ISSUE_RATE
324 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
326 /* The next 5 hooks have been implemented for reenabling sched1. With the
327 help of these macros we are limiting the movement of insns in sched1 to
328 reduce the register pressure. The overall idea is to keep count of SImode
329 and SFmode regs required by already scheduled insns. When these counts
330 cross some threshold values; give priority to insns that free registers.
331 The insn that frees registers is most likely to be the insn with lowest
332 LUID (original insn order); but such an insn might be there in the stalled
333 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
334 upto a max of 8 cycles so that such insns may move from Q -> R.
336 The description of the hooks are as below:
338 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
339 scheduler; it is called inside the sched_init function just after
340 find_insn_reg_weights function call. It is used to calculate the SImode
341 and SFmode weights of insns of basic blocks; much similar to what
342 find_insn_reg_weights does.
343 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
345 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
346 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
349 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
350 high; reorder the ready queue so that the insn with lowest LUID will be
353 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
354 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
356 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
357 can be returned from TARGET_SCHED_REORDER2.
359 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
361 #undef TARGET_SCHED_DFA_NEW_CYCLE
362 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
364 #undef TARGET_SCHED_INIT_GLOBAL
365 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
367 #undef TARGET_SCHED_FINISH_GLOBAL
368 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
370 #undef TARGET_SCHED_VARIABLE_ISSUE
371 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
373 #undef TARGET_SCHED_REORDER
374 #define TARGET_SCHED_REORDER sh_reorder
376 #undef TARGET_SCHED_REORDER2
377 #define TARGET_SCHED_REORDER2 sh_reorder2
379 #undef TARGET_SCHED_INIT
380 #define TARGET_SCHED_INIT sh_md_init
382 #undef TARGET_CANNOT_MODIFY_JUMPS_P
383 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
384 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
385 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
386 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
387 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
388 sh_optimize_target_register_callee_saved
390 #undef TARGET_MS_BITFIELD_LAYOUT_P
391 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
393 #undef TARGET_INIT_BUILTINS
394 #define TARGET_INIT_BUILTINS sh_init_builtins
395 #undef TARGET_EXPAND_BUILTIN
396 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
398 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
399 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
401 #undef TARGET_CANNOT_COPY_INSN_P
402 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
403 #undef TARGET_RTX_COSTS
404 #define TARGET_RTX_COSTS sh_rtx_costs
405 #undef TARGET_ADDRESS_COST
406 #define TARGET_ADDRESS_COST sh_address_cost
408 #undef TARGET_MACHINE_DEPENDENT_REORG
409 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
412 #undef TARGET_HAVE_TLS
413 #define TARGET_HAVE_TLS true
416 #undef TARGET_PROMOTE_PROTOTYPES
417 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
418 #undef TARGET_PROMOTE_FUNCTION_ARGS
419 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
420 #undef TARGET_PROMOTE_FUNCTION_RETURN
421 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
423 #undef TARGET_STRUCT_VALUE_RTX
424 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
425 #undef TARGET_RETURN_IN_MEMORY
426 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
428 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
429 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
430 #undef TARGET_SETUP_INCOMING_VARARGS
431 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
432 #undef TARGET_STRICT_ARGUMENT_NAMING
433 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
434 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
435 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
437 #undef TARGET_BUILD_BUILTIN_VA_LIST
438 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
440 #undef TARGET_PCH_VALID_P
441 #define TARGET_PCH_VALID_P sh_pch_valid_p
443 /* Return regmode weight for insn. */
444 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
446 /* Return current register pressure for regmode. */
447 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
449 struct gcc_target targetm = TARGET_INITIALIZER;
451 /* Print the operand address in x to the stream. */
454 print_operand_address (FILE *stream, rtx x)
456 switch (GET_CODE (x))
460 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
465 rtx base = XEXP (x, 0);
466 rtx index = XEXP (x, 1);
468 switch (GET_CODE (index))
471 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
472 reg_names[true_regnum (base)]);
478 int base_num = true_regnum (base);
479 int index_num = true_regnum (index);
481 fprintf (stream, "@(r0,%s)",
482 reg_names[MAX (base_num, index_num)]);
494 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
498 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
502 x = mark_constant_pool_use (x);
503 output_addr_const (stream, x);
508 /* Print operand x (an rtx) in assembler syntax to file stream
509 according to modifier code.
511 '.' print a .s if insn needs delay slot
512 ',' print LOCAL_LABEL_PREFIX
513 '@' print trap, rte or rts depending upon pragma interruptness
514 '#' output a nop if there is nothing to put in the delay slot
515 ''' print likelihood suffix (/u for unlikely).
516 'O' print a constant without the #
517 'R' print the LSW of a dp value - changes if in little endian
518 'S' print the MSW of a dp value - changes if in little endian
519 'T' print the next word of a dp value - same as 'R' in big endian mode.
520 'M' print an `x' if `m' will print `base,index'.
521 'N' print 'r63' if the operand is (const_int 0).
522 'm' print a pair `base,offset' or `base,index', for LD and ST.
523 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
524 'o' output an operator. */
527 print_operand (FILE *stream, rtx x, int code)
533 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
534 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
535 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
538 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
542 fprintf (stream, "trapa #%d", trap_exit);
543 else if (sh_cfun_interrupt_handler_p ())
544 fprintf (stream, "rte");
546 fprintf (stream, "rts");
549 /* Output a nop if there's nothing in the delay slot. */
550 if (dbr_sequence_length () == 0)
551 fprintf (stream, "\n\tnop");
555 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
557 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
558 fputs ("/u", stream);
562 x = mark_constant_pool_use (x);
563 output_addr_const (stream, x);
566 fputs (reg_names[REGNO (x) + LSW], (stream));
569 fputs (reg_names[REGNO (x) + MSW], (stream));
572 /* Next word of a double. */
573 switch (GET_CODE (x))
576 fputs (reg_names[REGNO (x) + 1], (stream));
579 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
580 && GET_CODE (XEXP (x, 0)) != POST_INC)
581 x = adjust_address (x, SImode, 4);
582 print_operand_address (stream, XEXP (x, 0));
589 switch (GET_CODE (x))
591 case PLUS: fputs ("add", stream); break;
592 case MINUS: fputs ("sub", stream); break;
593 case MULT: fputs ("mul", stream); break;
594 case DIV: fputs ("div", stream); break;
595 case EQ: fputs ("eq", stream); break;
596 case NE: fputs ("ne", stream); break;
597 case GT: case LT: fputs ("gt", stream); break;
598 case GE: case LE: fputs ("ge", stream); break;
599 case GTU: case LTU: fputs ("gtu", stream); break;
600 case GEU: case LEU: fputs ("geu", stream); break;
606 if (GET_CODE (x) == MEM
607 && GET_CODE (XEXP (x, 0)) == PLUS
608 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
609 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
614 if (GET_CODE (x) != MEM)
617 switch (GET_CODE (x))
621 print_operand (stream, x, 0);
622 fputs (", 0", stream);
626 print_operand (stream, XEXP (x, 0), 0);
627 fputs (", ", stream);
628 print_operand (stream, XEXP (x, 1), 0);
637 if (x == CONST0_RTX (GET_MODE (x)))
639 fprintf ((stream), "r63");
644 if (GET_CODE (x) == CONST_INT)
646 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
653 switch (GET_CODE (x))
655 /* FIXME: We need this on SHmedia32 because reload generates
656 some sign-extended HI or QI loads into DImode registers
657 but, because Pmode is SImode, the address ends up with a
658 subreg:SI of the DImode register. Maybe reload should be
659 fixed so as to apply alter_subreg to such loads? */
661 if (SUBREG_BYTE (x) != 0
662 || GET_CODE (SUBREG_REG (x)) != REG)
669 if (FP_REGISTER_P (REGNO (x))
670 && GET_MODE (x) == V16SFmode)
671 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
672 else if (FP_REGISTER_P (REGNO (x))
673 && GET_MODE (x) == V4SFmode)
674 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
675 else if (GET_CODE (x) == REG
676 && GET_MODE (x) == V2SFmode)
677 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
678 else if (FP_REGISTER_P (REGNO (x))
679 && GET_MODE_SIZE (GET_MODE (x)) > 4)
680 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
682 fputs (reg_names[REGNO (x)], (stream));
686 output_address (XEXP (x, 0));
691 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
692 && GET_MODE (XEXP (x, 0)) == DImode
693 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
694 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
696 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
699 if (GET_CODE (val) == ASHIFTRT)
702 if (GET_CODE (XEXP (val, 0)) == CONST)
704 output_addr_const (stream, XEXP (val, 0));
705 if (GET_CODE (XEXP (val, 0)) == CONST)
707 fputs (" >> ", stream);
708 output_addr_const (stream, XEXP (val, 1));
713 if (GET_CODE (val) == CONST)
715 output_addr_const (stream, val);
716 if (GET_CODE (val) == CONST)
719 fputs (" & 65535)", stream);
727 output_addr_const (stream, x);
734 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
736 force_into (rtx value, rtx target)
738 value = force_operand (value, target);
739 if (! rtx_equal_p (value, target))
740 emit_insn (gen_move_insn (target, value));
743 /* Emit code to perform a block move. Choose the best method.
745 OPERANDS[0] is the destination.
746 OPERANDS[1] is the source.
747 OPERANDS[2] is the size.
748 OPERANDS[3] is the alignment safe to use. */
751 expand_block_move (rtx *operands)
753 int align = INTVAL (operands[3]);
754 int constp = (GET_CODE (operands[2]) == CONST_INT);
755 int bytes = (constp ? INTVAL (operands[2]) : 0);
757 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
758 alignment, or if it isn't a multiple of 4 bytes, then fail. */
759 if (! constp || align < 4 || (bytes % 4 != 0))
766 else if (bytes == 12)
771 rtx r4 = gen_rtx_REG (SImode, 4);
772 rtx r5 = gen_rtx_REG (SImode, 5);
774 entry_name = get_identifier ("__movstrSI12_i4");
776 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
777 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
778 force_into (XEXP (operands[0], 0), r4);
779 force_into (XEXP (operands[1], 0), r5);
780 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
783 else if (! TARGET_SMALLCODE)
789 rtx r4 = gen_rtx_REG (SImode, 4);
790 rtx r5 = gen_rtx_REG (SImode, 5);
791 rtx r6 = gen_rtx_REG (SImode, 6);
793 entry_name = get_identifier (bytes & 4
795 : "__movstr_i4_even");
796 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
797 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
798 force_into (XEXP (operands[0], 0), r4);
799 force_into (XEXP (operands[1], 0), r5);
802 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
803 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
815 rtx r4 = gen_rtx_REG (SImode, 4);
816 rtx r5 = gen_rtx_REG (SImode, 5);
818 sprintf (entry, "__movstrSI%d", bytes);
819 entry_name = get_identifier (entry);
820 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
821 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
822 force_into (XEXP (operands[0], 0), r4);
823 force_into (XEXP (operands[1], 0), r5);
824 emit_insn (gen_block_move_real (func_addr_rtx));
828 /* This is the same number of bytes as a memcpy call, but to a different
829 less common function name, so this will occasionally use more space. */
830 if (! TARGET_SMALLCODE)
835 int final_switch, while_loop;
836 rtx r4 = gen_rtx_REG (SImode, 4);
837 rtx r5 = gen_rtx_REG (SImode, 5);
838 rtx r6 = gen_rtx_REG (SImode, 6);
840 entry_name = get_identifier ("__movstr");
841 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
842 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
843 force_into (XEXP (operands[0], 0), r4);
844 force_into (XEXP (operands[1], 0), r5);
846 /* r6 controls the size of the move. 16 is decremented from it
847 for each 64 bytes moved. Then the negative bit left over is used
848 as an index into a list of move instructions. e.g., a 72 byte move
849 would be set up with size(r6) = 14, for one iteration through the
850 big while loop, and a switch of -2 for the last part. */
852 final_switch = 16 - ((bytes / 4) % 16);
853 while_loop = ((bytes / 4) / 16 - 1) * 16;
854 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
855 emit_insn (gen_block_lump_real (func_addr_rtx));
862 /* Prepare operands for a move define_expand; specifically, one of the
863 operands must be in a register. */
866 prepare_move_operands (rtx operands[], enum machine_mode mode)
868 if ((mode == SImode || mode == DImode)
870 && ! ((mode == Pmode || mode == ptr_mode)
871 && tls_symbolic_operand (operands[1], Pmode) != 0))
874 if (SYMBOLIC_CONST_P (operands[1]))
876 if (GET_CODE (operands[0]) == MEM)
877 operands[1] = force_reg (Pmode, operands[1]);
878 else if (TARGET_SHMEDIA
879 && GET_CODE (operands[1]) == LABEL_REF
880 && target_reg_operand (operands[0], mode))
884 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
885 operands[1] = legitimize_pic_address (operands[1], mode, temp);
888 else if (GET_CODE (operands[1]) == CONST
889 && GET_CODE (XEXP (operands[1], 0)) == PLUS
890 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
892 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
893 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
895 operands[1] = expand_binop (mode, add_optab, temp,
896 XEXP (XEXP (operands[1], 0), 1),
897 no_new_pseudos ? temp
898 : gen_reg_rtx (Pmode),
903 if (! reload_in_progress && ! reload_completed)
905 /* Copy the source to a register if both operands aren't registers. */
906 if (! register_operand (operands[0], mode)
907 && ! sh_register_operand (operands[1], mode))
908 operands[1] = copy_to_mode_reg (mode, operands[1]);
910 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
912 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
913 except that we can't use that function because it is static. */
914 rtx new = change_address (operands[0], mode, 0);
915 MEM_COPY_ATTRIBUTES (new, operands[0]);
919 /* This case can happen while generating code to move the result
920 of a library call to the target. Reject `st r0,@(rX,rY)' because
921 reload will fail to find a spill register for rX, since r0 is already
922 being used for the source. */
923 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
924 && GET_CODE (operands[0]) == MEM
925 && GET_CODE (XEXP (operands[0], 0)) == PLUS
926 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
927 operands[1] = copy_to_mode_reg (mode, operands[1]);
930 if (mode == Pmode || mode == ptr_mode)
933 enum tls_model tls_kind;
937 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
939 rtx tga_op1, tga_ret, tmp, tmp2;
944 case TLS_MODEL_GLOBAL_DYNAMIC:
945 tga_ret = gen_rtx_REG (Pmode, R0_REG);
946 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
950 case TLS_MODEL_LOCAL_DYNAMIC:
951 tga_ret = gen_rtx_REG (Pmode, R0_REG);
952 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
954 tmp = gen_reg_rtx (Pmode);
955 emit_move_insn (tmp, tga_ret);
957 if (register_operand (op0, Pmode))
960 tmp2 = gen_reg_rtx (Pmode);
962 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
966 case TLS_MODEL_INITIAL_EXEC:
968 emit_insn (gen_GOTaddr2picreg ());
969 tga_op1 = gen_reg_rtx (Pmode);
970 tmp = gen_sym2GOTTPOFF (op1);
971 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
975 case TLS_MODEL_LOCAL_EXEC:
976 tmp2 = gen_reg_rtx (Pmode);
977 emit_insn (gen_load_gbr (tmp2));
978 tmp = gen_reg_rtx (Pmode);
979 emit_insn (gen_symTPOFF2reg (tmp, op1));
980 RTX_UNCHANGING_P (tmp) = 1;
982 if (register_operand (op0, Pmode))
985 op1 = gen_reg_rtx (Pmode);
987 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1000 /* Prepare the operands for an scc instruction; make sure that the
1001 compare has been done. */
1003 prepare_scc_operands (enum rtx_code code)
1005 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1006 enum rtx_code oldcode = code;
1007 enum machine_mode mode;
1009 /* First need a compare insn. */
1013 /* It isn't possible to handle this case. */
1030 if (code != oldcode)
1032 rtx tmp = sh_compare_op0;
1033 sh_compare_op0 = sh_compare_op1;
1034 sh_compare_op1 = tmp;
1037 mode = GET_MODE (sh_compare_op0);
1038 if (mode == VOIDmode)
1039 mode = GET_MODE (sh_compare_op1);
1041 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1042 if ((code != EQ && code != NE
1043 && (sh_compare_op1 != const0_rtx
1044 || code == GTU || code == GEU || code == LTU || code == LEU))
1045 || (mode == DImode && sh_compare_op1 != const0_rtx)
1046 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1047 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1049 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1050 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1051 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1052 gen_rtx_SET (VOIDmode, t_reg,
1053 gen_rtx_fmt_ee (code, SImode,
1054 sh_compare_op0, sh_compare_op1)),
1055 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1057 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1058 gen_rtx_fmt_ee (code, SImode,
1059 sh_compare_op0, sh_compare_op1)));
1064 /* Called from the md file, set up the operands of a compare instruction. */
1067 from_compare (rtx *operands, int code)
1069 enum machine_mode mode = GET_MODE (sh_compare_op0);
1071 if (mode == VOIDmode)
1072 mode = GET_MODE (sh_compare_op1);
1075 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1077 /* Force args into regs, since we can't use constants here. */
1078 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1079 if (sh_compare_op1 != const0_rtx
1080 || code == GTU || code == GEU
1081 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1082 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1084 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1086 from_compare (operands, GT);
1087 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1090 insn = gen_rtx_SET (VOIDmode,
1091 gen_rtx_REG (SImode, T_REG),
1092 gen_rtx_fmt_ee (code, SImode,
1093 sh_compare_op0, sh_compare_op1));
1094 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1096 insn = gen_rtx_PARALLEL (VOIDmode,
1098 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1099 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1105 /* Functions to output assembly code. */
1107 /* Return a sequence of instructions to perform DI or DF move.
1109 Since the SH cannot move a DI or DF in one instruction, we have
1110 to take care when we see overlapping source and dest registers. */
1113 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1114 enum machine_mode mode)
1116 rtx dst = operands[0];
1117 rtx src = operands[1];
1119 if (GET_CODE (dst) == MEM
1120 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1121 return "mov.l %T1,%0\n\tmov.l %1,%0";
1123 if (register_operand (dst, mode)
1124 && register_operand (src, mode))
1126 if (REGNO (src) == MACH_REG)
1127 return "sts mach,%S0\n\tsts macl,%R0";
1129 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1130 when mov.d r1,r0 do r1->r0 then r2->r1. */
1132 if (REGNO (src) + 1 == REGNO (dst))
1133 return "mov %T1,%T0\n\tmov %1,%0";
1135 return "mov %1,%0\n\tmov %T1,%T0";
1137 else if (GET_CODE (src) == CONST_INT)
1139 if (INTVAL (src) < 0)
1140 output_asm_insn ("mov #-1,%S0", operands);
1142 output_asm_insn ("mov #0,%S0", operands);
1144 return "mov %1,%R0";
1146 else if (GET_CODE (src) == MEM)
1149 int dreg = REGNO (dst);
1150 rtx inside = XEXP (src, 0);
1152 if (GET_CODE (inside) == REG)
1153 ptrreg = REGNO (inside);
1154 else if (GET_CODE (inside) == SUBREG)
1155 ptrreg = subreg_regno (inside);
1156 else if (GET_CODE (inside) == PLUS)
1158 ptrreg = REGNO (XEXP (inside, 0));
1159 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1160 an offsettable address. Unfortunately, offsettable addresses use
1161 QImode to check the offset, and a QImode offsettable address
1162 requires r0 for the other operand, which is not currently
1163 supported, so we can't use the 'o' constraint.
1164 Thus we must check for and handle r0+REG addresses here.
1165 We punt for now, since this is likely very rare. */
1166 if (GET_CODE (XEXP (inside, 1)) == REG)
1169 else if (GET_CODE (inside) == LABEL_REF)
1170 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1171 else if (GET_CODE (inside) == POST_INC)
1172 return "mov.l %1,%0\n\tmov.l %1,%T0";
1176 /* Work out the safe way to copy. Copy into the second half first. */
1178 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1181 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1184 /* Print an instruction which would have gone into a delay slot after
1185 another instruction, but couldn't because the other instruction expanded
1186 into a sequence where putting the slot insn at the end wouldn't work. */
1189 print_slot (rtx insn)
1191 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1193 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1197 output_far_jump (rtx insn, rtx op)
1199 struct { rtx lab, reg, op; } this;
1200 rtx braf_base_lab = NULL_RTX;
1203 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1206 this.lab = gen_label_rtx ();
1210 && offset - get_attr_length (insn) <= 32766)
1213 jump = "mov.w %O0,%1; braf %1";
1221 jump = "mov.l %O0,%1; braf %1";
1223 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1226 jump = "mov.l %O0,%1; jmp @%1";
1228 /* If we have a scratch register available, use it. */
1229 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1230 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1232 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1233 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1234 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1235 output_asm_insn (jump, &this.lab);
1236 if (dbr_sequence_length ())
1237 print_slot (final_sequence);
1239 output_asm_insn ("nop", 0);
1243 /* Output the delay slot insn first if any. */
1244 if (dbr_sequence_length ())
1245 print_slot (final_sequence);
1247 this.reg = gen_rtx_REG (SImode, 13);
1248 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1249 Fortunately, MACL is fixed and call-clobbered, and we never
1250 need its value across jumps, so save r13 in it instead of in
1253 output_asm_insn ("lds r13, macl", 0);
1255 output_asm_insn ("mov.l r13,@-r15", 0);
1256 output_asm_insn (jump, &this.lab);
1258 output_asm_insn ("sts macl, r13", 0);
1260 output_asm_insn ("mov.l @r15+,r13", 0);
1262 if (far && flag_pic && TARGET_SH2)
1264 braf_base_lab = gen_label_rtx ();
1265 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1266 CODE_LABEL_NUMBER (braf_base_lab));
1269 output_asm_insn (".align 2", 0);
1270 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1272 if (far && flag_pic)
1275 this.lab = braf_base_lab;
1276 output_asm_insn (".long %O2-%O0", &this.lab);
1279 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1283 /* Local label counter, used for constants in the pool and inside
1284 pattern branches. */
1286 static int lf = 100;
1288 /* Output code for ordinary branches. */
1291 output_branch (int logic, rtx insn, rtx *operands)
1293 switch (get_attr_length (insn))
1296 /* This can happen if filling the delay slot has caused a forward
1297 branch to exceed its range (we could reverse it, but only
1298 when we know we won't overextend other branches; this should
1299 best be handled by relaxation).
1300 It can also happen when other condbranches hoist delay slot insn
1301 from their destination, thus leading to code size increase.
1302 But the branch will still be in the range -4092..+4098 bytes. */
1307 /* The call to print_slot will clobber the operands. */
1308 rtx op0 = operands[0];
1310 /* If the instruction in the delay slot is annulled (true), then
1311 there is no delay slot where we can put it now. The only safe
1312 place for it is after the label. final will do that by default. */
1315 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1317 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1318 ASSEMBLER_DIALECT ? "/" : ".", label);
1319 print_slot (final_sequence);
1322 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1324 output_asm_insn ("bra\t%l0", &op0);
1325 fprintf (asm_out_file, "\tnop\n");
1326 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1330 /* When relaxing, handle this like a short branch. The linker
1331 will fix it up if it still doesn't fit after relaxation. */
1333 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1335 /* These are for SH2e, in which we have to account for the
1336 extra nop because of the hardware bug in annulled branches. */
1343 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1345 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1347 ASSEMBLER_DIALECT ? "/" : ".", label);
1348 fprintf (asm_out_file, "\tnop\n");
1349 output_asm_insn ("bra\t%l0", operands);
1350 fprintf (asm_out_file, "\tnop\n");
1351 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1355 /* When relaxing, fall through. */
1360 sprintf (buffer, "b%s%ss\t%%l0",
1362 ASSEMBLER_DIALECT ? "/" : ".");
1363 output_asm_insn (buffer, &operands[0]);
1368 /* There should be no longer branches now - that would
1369 indicate that something has destroyed the branches set
1370 up in machine_dependent_reorg. */
1376 output_branchy_insn (enum rtx_code code, const char *template,
1377 rtx insn, rtx *operands)
1379 rtx next_insn = NEXT_INSN (insn);
1381 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1383 rtx src = SET_SRC (PATTERN (next_insn));
1384 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1386 /* Following branch not taken */
1387 operands[9] = gen_label_rtx ();
1388 emit_label_after (operands[9], next_insn);
1389 INSN_ADDRESSES_NEW (operands[9],
1390 INSN_ADDRESSES (INSN_UID (next_insn))
1391 + get_attr_length (next_insn));
1396 int offset = (branch_dest (next_insn)
1397 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1398 if (offset >= -252 && offset <= 258)
1400 if (GET_CODE (src) == IF_THEN_ELSE)
1402 src = XEXP (src, 1);
1408 operands[9] = gen_label_rtx ();
1409 emit_label_after (operands[9], insn);
1410 INSN_ADDRESSES_NEW (operands[9],
1411 INSN_ADDRESSES (INSN_UID (insn))
1412 + get_attr_length (insn));
1417 output_ieee_ccmpeq (rtx insn, rtx *operands)
1419 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1422 /* Output the start of the assembler file. */
1425 sh_file_start (void)
1427 default_file_start ();
1430 /* We need to show the text section with the proper
1431 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1432 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1433 will complain. We can teach GAS specifically about the
1434 default attributes for our choice of text section, but
1435 then we would have to change GAS again if/when we change
1436 the text section name. */
1437 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1439 /* Switch to the data section so that the coffsem symbol
1440 isn't in the text section. */
1443 if (TARGET_LITTLE_ENDIAN)
1444 fputs ("\t.little\n", asm_out_file);
1448 if (TARGET_SHCOMPACT)
1449 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1450 else if (TARGET_SHMEDIA)
1451 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1452 TARGET_SHMEDIA64 ? 64 : 32);
1456 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1459 unspec_caller_rtx_p (rtx pat)
1461 switch (GET_CODE (pat))
1464 return unspec_caller_rtx_p (XEXP (pat, 0));
1467 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1469 return unspec_caller_rtx_p (XEXP (pat, 1));
1471 if (XINT (pat, 1) == UNSPEC_CALLER)
1480 /* Indicate that INSN cannot be duplicated. This is true for insn
1481 that generates an unique label. */
1484 sh_cannot_copy_insn_p (rtx insn)
1488 if (!reload_completed || !flag_pic)
1491 if (GET_CODE (insn) != INSN)
1493 if (asm_noperands (insn) >= 0)
1496 pat = PATTERN (insn);
1497 if (GET_CODE (pat) != SET)
1499 pat = SET_SRC (pat);
1501 if (unspec_caller_rtx_p (pat))
1507 /* Actual number of instructions used to make a shift by N. */
1508 static const char ashiftrt_insns[] =
1509 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1511 /* Left shift and logical right shift are the same. */
1512 static const char shift_insns[] =
1513 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1515 /* Individual shift amounts needed to get the above length sequences.
1516 One bit right shifts clobber the T bit, so when possible, put one bit
1517 shifts in the middle of the sequence, so the ends are eligible for
1518 branch delay slots. */
1519 static const short shift_amounts[32][5] = {
1520 {0}, {1}, {2}, {2, 1},
1521 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1522 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1523 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1524 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1525 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1526 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1527 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1529 /* Likewise, but for shift amounts < 16, up to three highmost bits
1530 might be clobbered. This is typically used when combined with some
1531 kind of sign or zero extension. */
1533 static const char ext_shift_insns[] =
1534 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1536 static const short ext_shift_amounts[32][4] = {
1537 {0}, {1}, {2}, {2, 1},
1538 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1539 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1540 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1541 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1542 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1543 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1544 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1546 /* Assuming we have a value that has been sign-extended by at least one bit,
1547 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1548 to shift it by N without data loss, and quicker than by other means? */
1549 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1551 /* This is used in length attributes in sh.md to help compute the length
1552 of arbitrary constant shift instructions. */
1555 shift_insns_rtx (rtx insn)
1557 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1558 int shift_count = INTVAL (XEXP (set_src, 1));
1559 enum rtx_code shift_code = GET_CODE (set_src);
1564 return ashiftrt_insns[shift_count];
1567 return shift_insns[shift_count];
1573 /* Return the cost of a shift. */
1583 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1585 if (GET_MODE (x) == DImode
1586 && GET_CODE (XEXP (x, 1)) == CONST_INT
1587 && INTVAL (XEXP (x, 1)) == 1)
1590 /* Everything else is invalid, because there is no pattern for it. */
1593 /* If shift by a non constant, then this will be expensive. */
1594 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1595 return SH_DYNAMIC_SHIFT_COST;
1597 value = INTVAL (XEXP (x, 1));
1599 /* Otherwise, return the true cost in instructions. */
1600 if (GET_CODE (x) == ASHIFTRT)
1602 int cost = ashiftrt_insns[value];
1603 /* If SH3, then we put the constant in a reg and use shad. */
1604 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1605 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1609 return shift_insns[value];
1612 /* Return the cost of an AND operation. */
1619 /* Anding with a register is a single cycle and instruction. */
1620 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1623 i = INTVAL (XEXP (x, 1));
1627 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1628 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1629 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1635 /* These constants are single cycle extu.[bw] instructions. */
1636 if (i == 0xff || i == 0xffff)
1638 /* Constants that can be used in an and immediate instruction in a single
1639 cycle, but this requires r0, so make it a little more expensive. */
1640 if (CONST_OK_FOR_K08 (i))
1642 /* Constants that can be loaded with a mov immediate and an and.
1643 This case is probably unnecessary. */
1644 if (CONST_OK_FOR_I08 (i))
1646 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1647 This case is probably unnecessary. */
1651 /* Return the cost of an addition or a subtraction. */
1656 /* Adding a register is a single cycle insn. */
1657 if (GET_CODE (XEXP (x, 1)) == REG
1658 || GET_CODE (XEXP (x, 1)) == SUBREG)
1661 /* Likewise for small constants. */
1662 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1663 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1667 switch (GET_CODE (XEXP (x, 1)))
1672 return TARGET_SHMEDIA64 ? 5 : 3;
1675 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1677 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1679 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1687 /* Any other constant requires a 2 cycle pc-relative load plus an
1692 /* Return the cost of a multiply. */
1694 multcosts (rtx x ATTRIBUTE_UNUSED)
1701 /* We have a mul insn, so we can never take more than the mul and the
1702 read of the mac reg, but count more because of the latency and extra
1704 if (TARGET_SMALLCODE)
1709 /* If we're aiming at small code, then just count the number of
1710 insns in a multiply call sequence. */
1711 if (TARGET_SMALLCODE)
1714 /* Otherwise count all the insns in the routine we'd be calling too. */
1718 /* Compute a (partial) cost for rtx X. Return true if the complete
1719 cost has been computed, and false if subexpressions should be
1720 scanned. In either case, *TOTAL contains the cost result. */
1723 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1730 if (INTVAL (x) == 0)
1732 else if (outer_code == AND && and_operand ((x), DImode))
1734 else if ((outer_code == IOR || outer_code == XOR
1735 || outer_code == PLUS)
1736 && CONST_OK_FOR_I10 (INTVAL (x)))
1738 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1739 *total = COSTS_N_INSNS (outer_code != SET);
1740 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1741 *total = COSTS_N_INSNS (2);
1742 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1743 *total = COSTS_N_INSNS (3);
1745 *total = COSTS_N_INSNS (4);
1748 if (CONST_OK_FOR_I08 (INTVAL (x)))
1750 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1751 && CONST_OK_FOR_K08 (INTVAL (x)))
1760 if (TARGET_SHMEDIA64)
1761 *total = COSTS_N_INSNS (4);
1762 else if (TARGET_SHMEDIA32)
1763 *total = COSTS_N_INSNS (2);
1770 *total = COSTS_N_INSNS (4);
1776 *total = COSTS_N_INSNS (addsubcosts (x));
1780 *total = COSTS_N_INSNS (andcosts (x));
1784 *total = COSTS_N_INSNS (multcosts (x));
1790 *total = COSTS_N_INSNS (shiftcosts (x));
1797 *total = COSTS_N_INSNS (20);
1810 /* Compute the cost of an address. For the SH, all valid addresses are
1811 the same cost. Use a slightly higher cost for reg + reg addressing,
1812 since it increases pressure on r0. */
1815 sh_address_cost (rtx X)
1817 return (GET_CODE (X) == PLUS
1818 && ! CONSTANT_P (XEXP (X, 1))
1819 && ! TARGET_SHMEDIA ? 1 : 0);
1822 /* Code to expand a shift. */
1825 gen_ashift (int type, int n, rtx reg)
1827 /* Negative values here come from the shift_amounts array. */
1840 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1844 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1846 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1849 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1854 /* Same for HImode */
1857 gen_ashift_hi (int type, int n, rtx reg)
1859 /* Negative values here come from the shift_amounts array. */
1873 /* We don't have HImode right shift operations because using the
1874 ordinary 32 bit shift instructions for that doesn't generate proper
1875 zero/sign extension.
1876 gen_ashift_hi is only called in contexts where we know that the
1877 sign extension works out correctly. */
1880 if (GET_CODE (reg) == SUBREG)
1882 offset = SUBREG_BYTE (reg);
1883 reg = SUBREG_REG (reg);
1885 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1889 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1894 /* Output RTL to split a constant shift into its component SH constant
1895 shift instructions. */
1898 gen_shifty_op (int code, rtx *operands)
1900 int value = INTVAL (operands[2]);
1903 /* Truncate the shift count in case it is out of bounds. */
1904 value = value & 0x1f;
1908 if (code == LSHIFTRT)
1910 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1911 emit_insn (gen_movt (operands[0]));
1914 else if (code == ASHIFT)
1916 /* There is a two instruction sequence for 31 bit left shifts,
1917 but it requires r0. */
1918 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1920 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1921 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1926 else if (value == 0)
1928 /* This can happen when not optimizing. We must output something here
1929 to prevent the compiler from aborting in final.c after the try_split
1931 emit_insn (gen_nop ());
1935 max = shift_insns[value];
1936 for (i = 0; i < max; i++)
1937 gen_ashift (code, shift_amounts[value][i], operands[0]);
1940 /* Same as above, but optimized for values where the topmost bits don't
1944 gen_shifty_hi_op (int code, rtx *operands)
1946 int value = INTVAL (operands[2]);
1948 void (*gen_fun) (int, int, rtx);
1950 /* This operation is used by and_shl for SImode values with a few
1951 high bits known to be cleared. */
1955 emit_insn (gen_nop ());
1959 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1962 max = ext_shift_insns[value];
1963 for (i = 0; i < max; i++)
1964 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1967 /* When shifting right, emit the shifts in reverse order, so that
1968 solitary negative values come first. */
1969 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1970 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1973 /* Output RTL for an arithmetic right shift. */
1975 /* ??? Rewrite to use super-optimizer sequences. */
1978 expand_ashiftrt (rtx *operands)
1988 if (GET_CODE (operands[2]) != CONST_INT)
1990 rtx count = copy_to_mode_reg (SImode, operands[2]);
1991 emit_insn (gen_negsi2 (count, count));
1992 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1995 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1996 > 1 + SH_DYNAMIC_SHIFT_COST)
1999 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2000 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2004 if (GET_CODE (operands[2]) != CONST_INT)
2007 value = INTVAL (operands[2]) & 31;
2011 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2014 else if (value >= 16 && value <= 19)
2016 wrk = gen_reg_rtx (SImode);
2017 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2020 gen_ashift (ASHIFTRT, 1, wrk);
2021 emit_move_insn (operands[0], wrk);
2024 /* Expand a short sequence inline, longer call a magic routine. */
2025 else if (value <= 5)
2027 wrk = gen_reg_rtx (SImode);
2028 emit_move_insn (wrk, operands[1]);
2030 gen_ashift (ASHIFTRT, 1, wrk);
2031 emit_move_insn (operands[0], wrk);
2035 wrk = gen_reg_rtx (Pmode);
2037 /* Load the value into an arg reg and call a helper. */
2038 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2039 sprintf (func, "__ashiftrt_r4_%d", value);
2040 func_name = get_identifier (func);
2041 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2042 emit_move_insn (wrk, sym);
2043 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2044 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2049 sh_dynamicalize_shift_p (rtx count)
2051 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2054 /* Try to find a good way to implement the combiner pattern
2055 [(set (match_operand:SI 0 "register_operand" "r")
2056 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2057 (match_operand:SI 2 "const_int_operand" "n"))
2058 (match_operand:SI 3 "const_int_operand" "n"))) .
2059 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2060 return 0 for simple right / left or left/right shift combination.
2061 return 1 for a combination of shifts with zero_extend.
2062 return 2 for a combination of shifts with an AND that needs r0.
2063 return 3 for a combination of shifts with an AND that needs an extra
2064 scratch register, when the three highmost bits of the AND mask are clear.
2065 return 4 for a combination of shifts with an AND that needs an extra
2066 scratch register, when any of the three highmost bits of the AND mask
2068 If ATTRP is set, store an initial right shift width in ATTRP[0],
2069 and the instruction length in ATTRP[1] . These values are not valid
2071 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2072 shift_amounts for the last shift value that is to be used before the
2075 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2077 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2078 int left = INTVAL (left_rtx), right;
2080 int cost, best_cost = 10000;
2081 int best_right = 0, best_len = 0;
2085 if (left < 0 || left > 31)
2087 if (GET_CODE (mask_rtx) == CONST_INT)
2088 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2090 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2091 /* Can this be expressed as a right shift / left shift pair? */
2092 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2093 right = exact_log2 (lsb);
2094 mask2 = ~(mask + lsb - 1);
2095 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2096 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2098 best_cost = shift_insns[right] + shift_insns[right + left];
2099 /* mask has no trailing zeroes <==> ! right */
2100 else if (! right && mask2 == ~(lsb2 - 1))
2102 int late_right = exact_log2 (lsb2);
2103 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2105 /* Try to use zero extend. */
2106 if (mask2 == ~(lsb2 - 1))
2110 for (width = 8; width <= 16; width += 8)
2112 /* Can we zero-extend right away? */
2113 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2116 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2117 if (cost < best_cost)
2128 /* ??? Could try to put zero extend into initial right shift,
2129 or even shift a bit left before the right shift. */
2130 /* Determine value of first part of left shift, to get to the
2131 zero extend cut-off point. */
2132 first = width - exact_log2 (lsb2) + right;
2133 if (first >= 0 && right + left - first >= 0)
2135 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2136 + ext_shift_insns[right + left - first];
2137 if (cost < best_cost)
2149 /* Try to use r0 AND pattern */
2150 for (i = 0; i <= 2; i++)
2154 if (! CONST_OK_FOR_K08 (mask >> i))
2156 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2157 if (cost < best_cost)
2162 best_len = cost - 1;
2165 /* Try to use a scratch register to hold the AND operand. */
2166 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2167 for (i = 0; i <= 2; i++)
2171 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2172 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2173 if (cost < best_cost)
2178 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2184 attrp[0] = best_right;
2185 attrp[1] = best_len;
2190 /* This is used in length attributes of the unnamed instructions
2191 corresponding to shl_and_kind return values of 1 and 2. */
2193 shl_and_length (rtx insn)
2195 rtx set_src, left_rtx, mask_rtx;
2198 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2199 left_rtx = XEXP (XEXP (set_src, 0), 1);
2200 mask_rtx = XEXP (set_src, 1);
2201 shl_and_kind (left_rtx, mask_rtx, attributes);
2202 return attributes[1];
2205 /* This is used in length attribute of the and_shl_scratch instruction. */
2208 shl_and_scr_length (rtx insn)
2210 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2211 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2212 rtx op = XEXP (set_src, 0);
2213 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2214 op = XEXP (XEXP (op, 0), 0);
2215 return len + shift_insns[INTVAL (XEXP (op, 1))];
2218 /* Generating rtl? */
2219 extern int rtx_equal_function_value_matters;
2221 /* Generate rtl for instructions for which shl_and_kind advised a particular
2222 method of generating them, i.e. returned zero. */
2225 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2228 unsigned HOST_WIDE_INT mask;
2229 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2230 int right, total_shift;
2231 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2233 right = attributes[0];
2234 total_shift = INTVAL (left_rtx) + right;
2235 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2242 int first = attributes[2];
2247 emit_insn ((mask << right) <= 0xff
2248 ? gen_zero_extendqisi2 (dest,
2249 gen_lowpart (QImode, source))
2250 : gen_zero_extendhisi2 (dest,
2251 gen_lowpart (HImode, source)));
2255 emit_insn (gen_movsi (dest, source));
2259 operands[2] = GEN_INT (right);
2260 gen_shifty_hi_op (LSHIFTRT, operands);
2264 operands[2] = GEN_INT (first);
2265 gen_shifty_hi_op (ASHIFT, operands);
2266 total_shift -= first;
2270 emit_insn (mask <= 0xff
2271 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2272 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2273 if (total_shift > 0)
2275 operands[2] = GEN_INT (total_shift);
2276 gen_shifty_hi_op (ASHIFT, operands);
2281 shift_gen_fun = gen_shifty_op;
2283 /* If the topmost bit that matters is set, set the topmost bits
2284 that don't matter. This way, we might be able to get a shorter
2286 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2287 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2289 /* Don't expand fine-grained when combining, because that will
2290 make the pattern fail. */
2291 if (rtx_equal_function_value_matters
2292 || reload_in_progress || reload_completed)
2296 /* Cases 3 and 4 should be handled by this split
2297 only while combining */
2302 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2305 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2310 operands[2] = GEN_INT (total_shift);
2311 shift_gen_fun (ASHIFT, operands);
2318 if (kind != 4 && total_shift < 16)
2320 neg = -ext_shift_amounts[total_shift][1];
2322 neg -= ext_shift_amounts[total_shift][2];
2326 emit_insn (gen_and_shl_scratch (dest, source,
2329 GEN_INT (total_shift + neg),
2331 emit_insn (gen_movsi (dest, dest));
2338 /* Try to find a good way to implement the combiner pattern
2339 [(set (match_operand:SI 0 "register_operand" "=r")
2340 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2341 (match_operand:SI 2 "const_int_operand" "n")
2342 (match_operand:SI 3 "const_int_operand" "n")
2344 (clobber (reg:SI T_REG))]
2345 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2346 return 0 for simple left / right shift combination.
2347 return 1 for left shift / 8 bit sign extend / left shift.
2348 return 2 for left shift / 16 bit sign extend / left shift.
2349 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2350 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2351 return 5 for left shift / 16 bit sign extend / right shift
2352 return 6 for < 8 bit sign extend / left shift.
2353 return 7 for < 8 bit sign extend / left shift / single right shift.
2354 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2357 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2359 int left, size, insize, ext;
2360 int cost = 0, best_cost;
2363 left = INTVAL (left_rtx);
2364 size = INTVAL (size_rtx);
2365 insize = size - left;
2368 /* Default to left / right shift. */
2370 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2373 /* 16 bit shift / sign extend / 16 bit shift */
2374 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2375 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2376 below, by alternative 3 or something even better. */
2377 if (cost < best_cost)
2383 /* Try a plain sign extend between two shifts. */
2384 for (ext = 16; ext >= insize; ext -= 8)
2388 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2389 if (cost < best_cost)
2391 kind = ext / (unsigned) 8;
2395 /* Check if we can do a sloppy shift with a final signed shift
2396 restoring the sign. */
2397 if (EXT_SHIFT_SIGNED (size - ext))
2398 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2399 /* If not, maybe it's still cheaper to do the second shift sloppy,
2400 and do a final sign extend? */
2401 else if (size <= 16)
2402 cost = ext_shift_insns[ext - insize] + 1
2403 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2406 if (cost < best_cost)
2408 kind = ext / (unsigned) 8 + 2;
2412 /* Check if we can sign extend in r0 */
2415 cost = 3 + shift_insns[left];
2416 if (cost < best_cost)
2421 /* Try the same with a final signed shift. */
2424 cost = 3 + ext_shift_insns[left + 1] + 1;
2425 if (cost < best_cost)
2434 /* Try to use a dynamic shift. */
2435 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2436 if (cost < best_cost)
2447 /* Function to be used in the length attribute of the instructions
2448 implementing this pattern. */
2451 shl_sext_length (rtx insn)
2453 rtx set_src, left_rtx, size_rtx;
2456 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2457 left_rtx = XEXP (XEXP (set_src, 0), 1);
2458 size_rtx = XEXP (set_src, 1);
2459 shl_sext_kind (left_rtx, size_rtx, &cost);
2463 /* Generate rtl for this pattern */
2466 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2469 int left, size, insize, cost;
2472 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2473 left = INTVAL (left_rtx);
2474 size = INTVAL (size_rtx);
2475 insize = size - left;
2483 int ext = kind & 1 ? 8 : 16;
2484 int shift2 = size - ext;
2486 /* Don't expand fine-grained when combining, because that will
2487 make the pattern fail. */
2488 if (! rtx_equal_function_value_matters
2489 && ! reload_in_progress && ! reload_completed)
2491 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2492 emit_insn (gen_movsi (dest, source));
2496 emit_insn (gen_movsi (dest, source));
2500 operands[2] = GEN_INT (ext - insize);
2501 gen_shifty_hi_op (ASHIFT, operands);
2504 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2505 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2510 operands[2] = GEN_INT (shift2);
2511 gen_shifty_op (ASHIFT, operands);
2518 if (EXT_SHIFT_SIGNED (shift2))
2520 operands[2] = GEN_INT (shift2 + 1);
2521 gen_shifty_op (ASHIFT, operands);
2522 operands[2] = const1_rtx;
2523 gen_shifty_op (ASHIFTRT, operands);
2526 operands[2] = GEN_INT (shift2);
2527 gen_shifty_hi_op (ASHIFT, operands);
2531 operands[2] = GEN_INT (-shift2);
2532 gen_shifty_hi_op (LSHIFTRT, operands);
2534 emit_insn (size <= 8
2535 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2536 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2543 if (! rtx_equal_function_value_matters
2544 && ! reload_in_progress && ! reload_completed)
2545 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2549 operands[2] = GEN_INT (16 - insize);
2550 gen_shifty_hi_op (ASHIFT, operands);
2551 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2553 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2555 gen_ashift (ASHIFTRT, 1, dest);
2560 /* Don't expand fine-grained when combining, because that will
2561 make the pattern fail. */
2562 if (! rtx_equal_function_value_matters
2563 && ! reload_in_progress && ! reload_completed)
2565 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2566 emit_insn (gen_movsi (dest, source));
2569 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2570 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2571 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2573 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2574 gen_shifty_op (ASHIFT, operands);
2576 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2584 /* Prefix a symbol_ref name with "datalabel". */
2587 gen_datalabel_ref (rtx sym)
2589 if (GET_CODE (sym) == LABEL_REF)
2590 return gen_rtx_CONST (GET_MODE (sym),
2591 gen_rtx_UNSPEC (GET_MODE (sym),
2595 if (GET_CODE (sym) != SYMBOL_REF)
2602 /* The SH cannot load a large constant into a register, constants have to
2603 come from a pc relative load. The reference of a pc relative load
2604 instruction must be less than 1k infront of the instruction. This
2605 means that we often have to dump a constant inside a function, and
2606 generate code to branch around it.
2608 It is important to minimize this, since the branches will slow things
2609 down and make things bigger.
2611 Worst case code looks like:
2629 We fix this by performing a scan before scheduling, which notices which
2630 instructions need to have their operands fetched from the constant table
2631 and builds the table.
2635 scan, find an instruction which needs a pcrel move. Look forward, find the
2636 last barrier which is within MAX_COUNT bytes of the requirement.
2637 If there isn't one, make one. Process all the instructions between
2638 the find and the barrier.
2640 In the above example, we can tell that L3 is within 1k of L1, so
2641 the first move can be shrunk from the 3 insn+constant sequence into
2642 just 1 insn, and the constant moved to L3 to make:
2653 Then the second move becomes the target for the shortening process. */
2657 rtx value; /* Value in table. */
2658 rtx label; /* Label of value. */
2659 rtx wend; /* End of window. */
2660 enum machine_mode mode; /* Mode of value. */
2662 /* True if this constant is accessed as part of a post-increment
2663 sequence. Note that HImode constants are never accessed in this way. */
2664 bool part_of_sequence_p;
2667 /* The maximum number of constants that can fit into one pool, since
2668 the pc relative range is 0...1020 bytes and constants are at least 4
2671 #define MAX_POOL_SIZE (1020/4)
2672 static pool_node pool_vector[MAX_POOL_SIZE];
2673 static int pool_size;
2674 static rtx pool_window_label;
2675 static int pool_window_last;
2677 /* ??? If we need a constant in HImode which is the truncated value of a
2678 constant we need in SImode, we could combine the two entries thus saving
2679 two bytes. Is this common enough to be worth the effort of implementing
2682 /* ??? This stuff should be done at the same time that we shorten branches.
2683 As it is now, we must assume that all branches are the maximum size, and
2684 this causes us to almost always output constant pools sooner than
2687 /* Add a constant to the pool and return its label. */
2690 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2693 rtx lab, new, ref, newref;
2695 /* First see if we've already got it. */
2696 for (i = 0; i < pool_size; i++)
2698 if (x->code == pool_vector[i].value->code
2699 && mode == pool_vector[i].mode)
2701 if (x->code == CODE_LABEL)
2703 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2706 if (rtx_equal_p (x, pool_vector[i].value))
2711 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2713 new = gen_label_rtx ();
2714 LABEL_REFS (new) = pool_vector[i].label;
2715 pool_vector[i].label = lab = new;
2717 if (lab && pool_window_label)
2719 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2720 ref = pool_vector[pool_window_last].wend;
2721 LABEL_NEXTREF (newref) = ref;
2722 pool_vector[pool_window_last].wend = newref;
2725 pool_window_label = new;
2726 pool_window_last = i;
2732 /* Need a new one. */
2733 pool_vector[pool_size].value = x;
2734 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2737 pool_vector[pool_size - 1].part_of_sequence_p = true;
2740 lab = gen_label_rtx ();
2741 pool_vector[pool_size].mode = mode;
2742 pool_vector[pool_size].label = lab;
2743 pool_vector[pool_size].wend = NULL_RTX;
2744 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2745 if (lab && pool_window_label)
2747 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2748 ref = pool_vector[pool_window_last].wend;
2749 LABEL_NEXTREF (newref) = ref;
2750 pool_vector[pool_window_last].wend = newref;
2753 pool_window_label = lab;
2754 pool_window_last = pool_size;
2759 /* Output the literal table. START, if nonzero, is the first instruction
2760 this table is needed for, and also indicates that there is at least one
2761 casesi_worker_2 instruction; We have to emit the operand3 labels from
2762 these insns at a 4-byte aligned position. BARRIER is the barrier
2763 after which we are to place the table. */
2766 dump_table (rtx start, rtx barrier)
2774 /* Do two passes, first time dump out the HI sized constants. */
2776 for (i = 0; i < pool_size; i++)
2778 pool_node *p = &pool_vector[i];
2780 if (p->mode == HImode)
2784 scan = emit_insn_after (gen_align_2 (), scan);
2787 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2788 scan = emit_label_after (lab, scan);
2789 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2791 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2793 lab = XEXP (ref, 0);
2794 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2797 else if (p->mode == DFmode)
2805 scan = emit_insn_after (gen_align_4 (), scan);
2807 for (; start != barrier; start = NEXT_INSN (start))
2808 if (GET_CODE (start) == INSN
2809 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2811 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2812 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2814 scan = emit_label_after (lab, scan);
2817 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2819 rtx align_insn = NULL_RTX;
2821 scan = emit_label_after (gen_label_rtx (), scan);
2822 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2825 for (i = 0; i < pool_size; i++)
2827 pool_node *p = &pool_vector[i];
2835 if (align_insn && !p->part_of_sequence_p)
2837 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2838 emit_label_before (lab, align_insn);
2839 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2841 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2843 lab = XEXP (ref, 0);
2844 emit_insn_before (gen_consttable_window_end (lab),
2847 delete_insn (align_insn);
2848 align_insn = NULL_RTX;
2853 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2854 scan = emit_label_after (lab, scan);
2855 scan = emit_insn_after (gen_consttable_4 (p->value,
2857 need_align = ! need_align;
2863 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2868 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2869 scan = emit_label_after (lab, scan);
2870 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2878 if (p->mode != HImode)
2880 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2882 lab = XEXP (ref, 0);
2883 scan = emit_insn_after (gen_consttable_window_end (lab),
2892 for (i = 0; i < pool_size; i++)
2894 pool_node *p = &pool_vector[i];
2905 scan = emit_label_after (gen_label_rtx (), scan);
2906 scan = emit_insn_after (gen_align_4 (), scan);
2908 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2909 scan = emit_label_after (lab, scan);
2910 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2918 scan = emit_label_after (gen_label_rtx (), scan);
2919 scan = emit_insn_after (gen_align_4 (), scan);
2921 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2922 scan = emit_label_after (lab, scan);
2923 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2931 if (p->mode != HImode)
2933 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2935 lab = XEXP (ref, 0);
2936 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2941 scan = emit_insn_after (gen_consttable_end (), scan);
2942 scan = emit_barrier_after (scan);
2944 pool_window_label = NULL_RTX;
2945 pool_window_last = 0;
2948 /* Return nonzero if constant would be an ok source for a
2949 mov.w instead of a mov.l. */
2954 return (GET_CODE (src) == CONST_INT
2955 && INTVAL (src) >= -32768
2956 && INTVAL (src) <= 32767);
2959 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2961 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2962 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2963 need to fix it if the input value is CONST_OK_FOR_I08. */
2966 broken_move (rtx insn)
2968 if (GET_CODE (insn) == INSN)
2970 rtx pat = PATTERN (insn);
2971 if (GET_CODE (pat) == PARALLEL)
2972 pat = XVECEXP (pat, 0, 0);
2973 if (GET_CODE (pat) == SET
2974 /* We can load any 8 bit value if we don't care what the high
2975 order bits end up as. */
2976 && GET_MODE (SET_DEST (pat)) != QImode
2977 && (CONSTANT_P (SET_SRC (pat))
2978 /* Match mova_const. */
2979 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2980 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2981 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2983 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2984 && (fp_zero_operand (SET_SRC (pat))
2985 || fp_one_operand (SET_SRC (pat)))
2986 /* ??? If this is a -m4 or -m4-single compilation, in general
2987 we don't know the current setting of fpscr, so disable fldi.
2988 There is an exception if this was a register-register move
2989 before reload - and hence it was ascertained that we have
2990 single precision setting - and in a post-reload optimization
2991 we changed this to do a constant load. In that case
2992 we don't have an r0 clobber, hence we must use fldi. */
2993 && (! TARGET_SH4 || TARGET_FMOVD
2994 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2996 && GET_CODE (SET_DEST (pat)) == REG
2997 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2998 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2999 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3009 return (GET_CODE (insn) == INSN
3010 && GET_CODE (PATTERN (insn)) == SET
3011 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3012 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3013 /* Don't match mova_const. */
3014 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3017 /* Fix up a mova from a switch that went out of range. */
3019 fixup_mova (rtx mova)
3023 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3024 INSN_CODE (mova) = -1;
3029 rtx lab = gen_label_rtx ();
3030 rtx wpat, wpat0, wpat1, wsrc, diff;
3034 worker = NEXT_INSN (worker);
3036 || GET_CODE (worker) == CODE_LABEL
3037 || GET_CODE (worker) == JUMP_INSN)
3039 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3040 wpat = PATTERN (worker);
3041 wpat0 = XVECEXP (wpat, 0, 0);
3042 wpat1 = XVECEXP (wpat, 0, 1);
3043 wsrc = SET_SRC (wpat0);
3044 PATTERN (worker) = (gen_casesi_worker_2
3045 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3046 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3048 INSN_CODE (worker) = -1;
3049 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3050 gen_rtx_LABEL_REF (Pmode, lab));
3051 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3052 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3053 INSN_CODE (mova) = -1;
3057 /* Find the last barrier from insn FROM which is close enough to hold the
3058 constant pool. If we can't find one, then create one near the end of
3062 find_barrier (int num_mova, rtx mova, rtx from)
3071 int leading_mova = num_mova;
3072 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3076 /* For HImode: range is 510, add 4 because pc counts from address of
3077 second instruction after this one, subtract 2 for the jump instruction
3078 that we may need to emit before the table, subtract 2 for the instruction
3079 that fills the jump delay slot (in very rare cases, reorg will take an
3080 instruction from after the constant pool or will leave the delay slot
3081 empty). This gives 510.
3082 For SImode: range is 1020, add 4 because pc counts from address of
3083 second instruction after this one, subtract 2 in case pc is 2 byte
3084 aligned, subtract 2 for the jump instruction that we may need to emit
3085 before the table, subtract 2 for the instruction that fills the jump
3086 delay slot. This gives 1018. */
3088 /* The branch will always be shortened now that the reference address for
3089 forward branches is the successor address, thus we need no longer make
3090 adjustments to the [sh]i_limit for -O0. */
3095 while (from && count_si < si_limit && count_hi < hi_limit)
3097 int inc = get_attr_length (from);
3100 if (GET_CODE (from) == CODE_LABEL)
3103 new_align = 1 << label_to_alignment (from);
3104 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3105 new_align = 1 << barrier_align (from);
3111 if (GET_CODE (from) == BARRIER)
3114 found_barrier = from;
3116 /* If we are at the end of the function, or in front of an alignment
3117 instruction, we need not insert an extra alignment. We prefer
3118 this kind of barrier. */
3119 if (barrier_align (from) > 2)
3120 good_barrier = from;
3123 if (broken_move (from))
3126 enum machine_mode mode;
3128 pat = PATTERN (from);
3129 if (GET_CODE (pat) == PARALLEL)
3130 pat = XVECEXP (pat, 0, 0);
3131 src = SET_SRC (pat);
3132 dst = SET_DEST (pat);
3133 mode = GET_MODE (dst);
3135 /* We must explicitly check the mode, because sometimes the
3136 front end will generate code to load unsigned constants into
3137 HImode targets without properly sign extending them. */
3139 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3142 /* We put the short constants before the long constants, so
3143 we must count the length of short constants in the range
3144 for the long constants. */
3145 /* ??? This isn't optimal, but is easy to do. */
3150 /* We dump DF/DI constants before SF/SI ones, because
3151 the limit is the same, but the alignment requirements
3152 are higher. We may waste up to 4 additional bytes
3153 for alignment, and the DF/DI constant may have
3154 another SF/SI constant placed before it. */
3155 if (TARGET_SHCOMPACT
3157 && (mode == DFmode || mode == DImode))
3162 while (si_align > 2 && found_si + si_align - 2 > count_si)
3164 if (found_si > count_si)
3165 count_si = found_si;
3166 found_si += GET_MODE_SIZE (mode);
3168 si_limit -= GET_MODE_SIZE (mode);
3171 /* See the code in machine_dependent_reorg, which has a similar if
3172 statement that generates a new mova insn in many cases. */
3173 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3183 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3185 if (found_si > count_si)
3186 count_si = found_si;
3188 else if (GET_CODE (from) == JUMP_INSN
3189 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3190 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3194 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3196 /* We have just passed the barrier in front of the
3197 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3198 the ADDR_DIFF_VEC is accessed as data, just like our pool
3199 constants, this is a good opportunity to accommodate what
3200 we have gathered so far.
3201 If we waited any longer, we could end up at a barrier in
3202 front of code, which gives worse cache usage for separated
3203 instruction / data caches. */
3204 good_barrier = found_barrier;
3209 rtx body = PATTERN (from);
3210 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3213 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3214 else if (GET_CODE (from) == JUMP_INSN
3216 && ! TARGET_SMALLCODE)
3222 if (new_align > si_align)
3224 si_limit -= (count_si - 1) & (new_align - si_align);
3225 si_align = new_align;
3227 count_si = (count_si + new_align - 1) & -new_align;
3232 if (new_align > hi_align)
3234 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3235 hi_align = new_align;
3237 count_hi = (count_hi + new_align - 1) & -new_align;
3239 from = NEXT_INSN (from);
3246 /* Try as we might, the leading mova is out of range. Change
3247 it into a load (which will become a pcload) and retry. */
3249 return find_barrier (0, 0, mova);
3253 /* Insert the constant pool table before the mova instruction,
3254 to prevent the mova label reference from going out of range. */
3256 good_barrier = found_barrier = barrier_before_mova;
3262 if (good_barrier && next_real_insn (found_barrier))
3263 found_barrier = good_barrier;
3267 /* We didn't find a barrier in time to dump our stuff,
3268 so we'll make one. */
3269 rtx label = gen_label_rtx ();
3271 /* If we exceeded the range, then we must back up over the last
3272 instruction we looked at. Otherwise, we just need to undo the
3273 NEXT_INSN at the end of the loop. */
3274 if (count_hi > hi_limit || count_si > si_limit)
3275 from = PREV_INSN (PREV_INSN (from));
3277 from = PREV_INSN (from);
3279 /* Walk back to be just before any jump or label.
3280 Putting it before a label reduces the number of times the branch
3281 around the constant pool table will be hit. Putting it before
3282 a jump makes it more likely that the bra delay slot will be
3284 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3285 || GET_CODE (from) == CODE_LABEL)
3286 from = PREV_INSN (from);
3288 from = emit_jump_insn_after (gen_jump (label), from);
3289 JUMP_LABEL (from) = label;
3290 LABEL_NUSES (label) = 1;
3291 found_barrier = emit_barrier_after (from);
3292 emit_label_after (label, found_barrier);
3295 return found_barrier;
3298 /* If the instruction INSN is implemented by a special function, and we can
3299 positively find the register that is used to call the sfunc, and this
3300 register is not used anywhere else in this instruction - except as the
3301 destination of a set, return this register; else, return 0. */
3303 sfunc_uses_reg (rtx insn)
3306 rtx pattern, part, reg_part, reg;
3308 if (GET_CODE (insn) != INSN)
3310 pattern = PATTERN (insn);
3311 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3314 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3316 part = XVECEXP (pattern, 0, i);
3317 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3322 reg = XEXP (reg_part, 0);
3323 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3325 part = XVECEXP (pattern, 0, i);
3326 if (part == reg_part || GET_CODE (part) == CLOBBER)
3328 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3329 && GET_CODE (SET_DEST (part)) == REG)
3330 ? SET_SRC (part) : part)))
3336 /* See if the only way in which INSN uses REG is by calling it, or by
3337 setting it while calling it. Set *SET to a SET rtx if the register
3341 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3347 reg2 = sfunc_uses_reg (insn);
3348 if (reg2 && REGNO (reg2) == REGNO (reg))
3350 pattern = single_set (insn);
3352 && GET_CODE (SET_DEST (pattern)) == REG
3353 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3357 if (GET_CODE (insn) != CALL_INSN)
3359 /* We don't use rtx_equal_p because we don't care if the mode is
3361 pattern = single_set (insn);
3363 && GET_CODE (SET_DEST (pattern)) == REG
3364 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3370 par = PATTERN (insn);
3371 if (GET_CODE (par) == PARALLEL)
3372 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3374 part = XVECEXP (par, 0, i);
3375 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3378 return reg_mentioned_p (reg, SET_SRC (pattern));
3384 pattern = PATTERN (insn);
3386 if (GET_CODE (pattern) == PARALLEL)
3390 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3391 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3393 pattern = XVECEXP (pattern, 0, 0);
3396 if (GET_CODE (pattern) == SET)
3398 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3400 /* We don't use rtx_equal_p, because we don't care if the
3401 mode is different. */
3402 if (GET_CODE (SET_DEST (pattern)) != REG
3403 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3409 pattern = SET_SRC (pattern);
3412 if (GET_CODE (pattern) != CALL
3413 || GET_CODE (XEXP (pattern, 0)) != MEM
3414 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3420 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3421 general registers. Bits 0..15 mean that the respective registers
3422 are used as inputs in the instruction. Bits 16..31 mean that the
3423 registers 0..15, respectively, are used as outputs, or are clobbered.
3424 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3426 regs_used (rtx x, int is_dest)
3434 code = GET_CODE (x);
3439 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3440 << (REGNO (x) + is_dest));
3444 rtx y = SUBREG_REG (x);
3446 if (GET_CODE (y) != REG)
3449 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3451 subreg_regno_offset (REGNO (y),
3454 GET_MODE (x)) + is_dest));
3458 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3460 /* If there was a return value, it must have been indicated with USE. */
3475 fmt = GET_RTX_FORMAT (code);
3477 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3482 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3483 used |= regs_used (XVECEXP (x, i, j), is_dest);
3485 else if (fmt[i] == 'e')
3486 used |= regs_used (XEXP (x, i), is_dest);
3491 /* Create an instruction that prevents redirection of a conditional branch
3492 to the destination of the JUMP with address ADDR.
3493 If the branch needs to be implemented as an indirect jump, try to find
3494 a scratch register for it.
3495 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3496 If any preceding insn that doesn't fit into a delay slot is good enough,
3497 pass 1. Pass 2 if a definite blocking insn is needed.
3498 -1 is used internally to avoid deep recursion.
3499 If a blocking instruction is made or recognized, return it. */
3502 gen_block_redirect (rtx jump, int addr, int need_block)
3505 rtx prev = prev_nonnote_insn (jump);
3508 /* First, check if we already have an instruction that satisfies our need. */
3509 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3511 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3513 if (GET_CODE (PATTERN (prev)) == USE
3514 || GET_CODE (PATTERN (prev)) == CLOBBER
3515 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3517 else if ((need_block &= ~1) < 0)
3519 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3522 if (GET_CODE (PATTERN (jump)) == RETURN)
3526 /* Reorg even does nasty things with return insns that cause branches
3527 to go out of range - see find_end_label and callers. */
3528 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3530 /* We can't use JUMP_LABEL here because it might be undefined
3531 when not optimizing. */
3532 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3533 /* If the branch is out of range, try to find a scratch register for it. */
3535 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3539 /* Don't look for the stack pointer as a scratch register,
3540 it would cause trouble if an interrupt occurred. */
3541 unsigned try = 0x7fff, used;
3542 int jump_left = flag_expensive_optimizations + 1;
3544 /* It is likely that the most recent eligible instruction is wanted for
3545 the delay slot. Therefore, find out which registers it uses, and
3546 try to avoid using them. */
3548 for (scan = jump; (scan = PREV_INSN (scan)); )
3552 if (INSN_DELETED_P (scan))
3554 code = GET_CODE (scan);
3555 if (code == CODE_LABEL || code == JUMP_INSN)
3558 && GET_CODE (PATTERN (scan)) != USE
3559 && GET_CODE (PATTERN (scan)) != CLOBBER
3560 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3562 try &= ~regs_used (PATTERN (scan), 0);
3566 for (used = dead = 0, scan = JUMP_LABEL (jump);
3567 (scan = NEXT_INSN (scan)); )
3571 if (INSN_DELETED_P (scan))
3573 code = GET_CODE (scan);
3576 used |= regs_used (PATTERN (scan), 0);
3577 if (code == CALL_INSN)
3578 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3579 dead |= (used >> 16) & ~used;
3585 if (code == JUMP_INSN)
3587 if (jump_left-- && simplejump_p (scan))
3588 scan = JUMP_LABEL (scan);
3594 /* Mask out the stack pointer again, in case it was
3595 the only 'free' register we have found. */
3598 /* If the immediate destination is still in range, check for possible
3599 threading with a jump beyond the delay slot insn.
3600 Don't check if we are called recursively; the jump has been or will be
3601 checked in a different invocation then. */
3603 else if (optimize && need_block >= 0)
3605 rtx next = next_active_insn (next_active_insn (dest));
3606 if (next && GET_CODE (next) == JUMP_INSN
3607 && GET_CODE (PATTERN (next)) == SET
3608 && recog_memoized (next) == CODE_FOR_jump_compact)
3610 dest = JUMP_LABEL (next);
3612 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3614 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3620 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3622 /* It would be nice if we could convert the jump into an indirect
3623 jump / far branch right now, and thus exposing all constituent
3624 instructions to further optimization. However, reorg uses
3625 simplejump_p to determine if there is an unconditional jump where
3626 it should try to schedule instructions from the target of the
3627 branch; simplejump_p fails for indirect jumps even if they have
3629 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3630 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3632 /* ??? We would like this to have the scope of the jump, but that
3633 scope will change when a delay slot insn of an inner scope is added.
3634 Hence, after delay slot scheduling, we'll have to expect
3635 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3638 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3639 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3642 else if (need_block)
3643 /* We can't use JUMP_LABEL here because it might be undefined
3644 when not optimizing. */
3645 return emit_insn_before (gen_block_branch_redirect
3646 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3651 #define CONDJUMP_MIN -252
3652 #define CONDJUMP_MAX 262
3655 /* A label (to be placed) in front of the jump
3656 that jumps to our ultimate destination. */
3658 /* Where we are going to insert it if we cannot move the jump any farther,
3659 or the jump itself if we have picked up an existing jump. */
3661 /* The ultimate destination. */
3663 struct far_branch *prev;
3664 /* If the branch has already been created, its address;
3665 else the address of its first prospective user. */
3669 static void gen_far_branch (struct far_branch *);
3670 enum mdep_reorg_phase_e mdep_reorg_phase;
3672 gen_far_branch (struct far_branch *bp)
3674 rtx insn = bp->insert_place;
3676 rtx label = gen_label_rtx ();
3678 emit_label_after (label, insn);
3681 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3682 LABEL_NUSES (bp->far_label)++;
3685 jump = emit_jump_insn_after (gen_return (), insn);
3686 /* Emit a barrier so that reorg knows that any following instructions
3687 are not reachable via a fall-through path.
3688 But don't do this when not optimizing, since we wouldn't suppress the
3689 alignment for the barrier then, and could end up with out-of-range
3690 pc-relative loads. */
3692 emit_barrier_after (jump);
3693 emit_label_after (bp->near_label, insn);
3694 JUMP_LABEL (jump) = bp->far_label;
3695 if (! invert_jump (insn, label, 1))
3697 /* If we are branching around a jump (rather than a return), prevent
3698 reorg from using an insn from the jump target as the delay slot insn -
3699 when reorg did this, it pessimized code (we rather hide the delay slot)
3700 and it could cause branches to go out of range. */
3703 (gen_stuff_delay_slot
3704 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3705 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3707 /* Prevent reorg from undoing our splits. */
3708 gen_block_redirect (jump, bp->address += 2, 2);
3711 /* Fix up ADDR_DIFF_VECs. */
3713 fixup_addr_diff_vecs (rtx first)
3717 for (insn = first; insn; insn = NEXT_INSN (insn))
3719 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3721 if (GET_CODE (insn) != JUMP_INSN
3722 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3724 pat = PATTERN (insn);
3725 vec_lab = XEXP (XEXP (pat, 0), 0);
3727 /* Search the matching casesi_jump_2. */
3728 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3730 if (GET_CODE (prev) != JUMP_INSN)
3732 prevpat = PATTERN (prev);
3733 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3735 x = XVECEXP (prevpat, 0, 1);
3736 if (GET_CODE (x) != USE)
3739 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3743 /* Emit the reference label of the braf where it belongs, right after
3744 the casesi_jump_2 (i.e. braf). */
3745 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3746 emit_label_after (braf_label, prev);
3748 /* Fix up the ADDR_DIF_VEC to be relative
3749 to the reference address of the braf. */
3750 XEXP (XEXP (pat, 0), 0) = braf_label;
3754 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3755 a barrier. Return the base 2 logarithm of the desired alignment. */
3757 barrier_align (rtx barrier_or_label)
3759 rtx next = next_real_insn (barrier_or_label), pat, prev;
3760 int slot, credit, jump_to_next = 0;
3765 pat = PATTERN (next);
3767 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3770 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3771 /* This is a barrier in front of a constant table. */
3774 prev = prev_real_insn (barrier_or_label);
3775 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3777 pat = PATTERN (prev);
3778 /* If this is a very small table, we want to keep the alignment after
3779 the table to the minimum for proper code alignment. */
3780 return ((TARGET_SMALLCODE
3781 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3782 <= (unsigned) 1 << (CACHE_LOG - 2)))
3783 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3786 if (TARGET_SMALLCODE)
3789 if (! TARGET_SH2 || ! optimize)
3790 return align_jumps_log;
3792 /* When fixing up pcloads, a constant table might be inserted just before
3793 the basic block that ends with the barrier. Thus, we can't trust the
3794 instruction lengths before that. */
3795 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3797 /* Check if there is an immediately preceding branch to the insn beyond
3798 the barrier. We must weight the cost of discarding useful information
3799 from the current cache line when executing this branch and there is
3800 an alignment, against that of fetching unneeded insn in front of the
3801 branch target when there is no alignment. */
3803 /* There are two delay_slot cases to consider. One is the simple case
3804 where the preceding branch is to the insn beyond the barrier (simple
3805 delay slot filling), and the other is where the preceding branch has
3806 a delay slot that is a duplicate of the insn after the barrier
3807 (fill_eager_delay_slots) and the branch is to the insn after the insn
3808 after the barrier. */
3810 /* PREV is presumed to be the JUMP_INSN for the barrier under
3811 investigation. Skip to the insn before it. */
3812 prev = prev_real_insn (prev);
3814 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3815 credit >= 0 && prev && GET_CODE (prev) == INSN;
3816 prev = prev_real_insn (prev))
3819 if (GET_CODE (PATTERN (prev)) == USE
3820 || GET_CODE (PATTERN (prev)) == CLOBBER)
3822 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3824 prev = XVECEXP (PATTERN (prev), 0, 1);
3825 if (INSN_UID (prev) == INSN_UID (next))
3827 /* Delay slot was filled with insn at jump target. */
3834 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3836 credit -= get_attr_length (prev);
3839 && GET_CODE (prev) == JUMP_INSN
3840 && JUMP_LABEL (prev))
3844 || next_real_insn (JUMP_LABEL (prev)) == next
3845 /* If relax_delay_slots() decides NEXT was redundant
3846 with some previous instruction, it will have
3847 redirected PREV's jump to the following insn. */
3848 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3849 /* There is no upper bound on redundant instructions
3850 that might have been skipped, but we must not put an
3851 alignment where none had been before. */
3852 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3854 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3855 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3856 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3858 rtx pat = PATTERN (prev);
3859 if (GET_CODE (pat) == PARALLEL)
3860 pat = XVECEXP (pat, 0, 0);
3861 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3867 return align_jumps_log;
3870 /* If we are inside a phony loop, almost any kind of label can turn up as the
3871 first one in the loop. Aligning a braf label causes incorrect switch
3872 destination addresses; we can detect braf labels because they are
3873 followed by a BARRIER.
3874 Applying loop alignment to small constant or switch tables is a waste
3875 of space, so we suppress this too. */
3877 sh_loop_align (rtx label)
3882 next = next_nonnote_insn (next);
3883 while (next && GET_CODE (next) == CODE_LABEL);
3887 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3888 || recog_memoized (next) == CODE_FOR_consttable_2)
3891 return align_loops_log;
3894 /* Do a final pass over the function, just before delayed branch
3900 rtx first, insn, mova = NULL_RTX;
3902 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3903 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3905 first = get_insns ();
3907 /* We must split call insns before introducing `mova's. If we're
3908 optimizing, they'll have already been split. Otherwise, make
3909 sure we don't split them too late. */
3911 split_all_insns_noflow ();
3916 /* If relaxing, generate pseudo-ops to associate function calls with
3917 the symbols they call. It does no harm to not generate these
3918 pseudo-ops. However, when we can generate them, it enables to
3919 linker to potentially relax the jsr to a bsr, and eliminate the
3920 register load and, possibly, the constant pool entry. */
3922 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3925 /* Remove all REG_LABEL notes. We want to use them for our own
3926 purposes. This works because none of the remaining passes
3927 need to look at them.
3929 ??? But it may break in the future. We should use a machine
3930 dependent REG_NOTE, or some other approach entirely. */
3931 for (insn = first; insn; insn = NEXT_INSN (insn))
3937 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3938 remove_note (insn, note);
3942 for (insn = first; insn; insn = NEXT_INSN (insn))
3944 rtx pattern, reg, link, set, scan, dies, label;
3945 int rescan = 0, foundinsn = 0;
3947 if (GET_CODE (insn) == CALL_INSN)
3949 pattern = PATTERN (insn);
3951 if (GET_CODE (pattern) == PARALLEL)
3952 pattern = XVECEXP (pattern, 0, 0);
3953 if (GET_CODE (pattern) == SET)
3954 pattern = SET_SRC (pattern);
3956 if (GET_CODE (pattern) != CALL
3957 || GET_CODE (XEXP (pattern, 0)) != MEM)
3960 reg = XEXP (XEXP (pattern, 0), 0);
3964 reg = sfunc_uses_reg (insn);
3969 if (GET_CODE (reg) != REG)
3972 /* This is a function call via REG. If the only uses of REG
3973 between the time that it is set and the time that it dies
3974 are in function calls, then we can associate all the
3975 function calls with the setting of REG. */
3977 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3979 if (REG_NOTE_KIND (link) != 0)
3981 set = single_set (XEXP (link, 0));
3982 if (set && rtx_equal_p (reg, SET_DEST (set)))
3984 link = XEXP (link, 0);
3991 /* ??? Sometimes global register allocation will have
3992 deleted the insn pointed to by LOG_LINKS. Try
3993 scanning backward to find where the register is set. */
3994 for (scan = PREV_INSN (insn);
3995 scan && GET_CODE (scan) != CODE_LABEL;
3996 scan = PREV_INSN (scan))
3998 if (! INSN_P (scan))
4001 if (! reg_mentioned_p (reg, scan))
4004 if (noncall_uses_reg (reg, scan, &set))
4018 /* The register is set at LINK. */
4020 /* We can only optimize the function call if the register is
4021 being set to a symbol. In theory, we could sometimes
4022 optimize calls to a constant location, but the assembler
4023 and linker do not support that at present. */
4024 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4025 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4028 /* Scan forward from LINK to the place where REG dies, and
4029 make sure that the only insns which use REG are
4030 themselves function calls. */
4032 /* ??? This doesn't work for call targets that were allocated
4033 by reload, since there may not be a REG_DEAD note for the
4037 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4041 /* Don't try to trace forward past a CODE_LABEL if we haven't
4042 seen INSN yet. Ordinarily, we will only find the setting insn
4043 in LOG_LINKS if it is in the same basic block. However,
4044 cross-jumping can insert code labels in between the load and
4045 the call, and can result in situations where a single call
4046 insn may have two targets depending on where we came from. */
4048 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4051 if (! INSN_P (scan))
4054 /* Don't try to trace forward past a JUMP. To optimize
4055 safely, we would have to check that all the
4056 instructions at the jump destination did not use REG. */
4058 if (GET_CODE (scan) == JUMP_INSN)
4061 if (! reg_mentioned_p (reg, scan))
4064 if (noncall_uses_reg (reg, scan, &scanset))
4071 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4073 /* There is a function call to this register other
4074 than the one we are checking. If we optimize
4075 this call, we need to rescan again below. */
4079 /* ??? We shouldn't have to worry about SCANSET here.
4080 We should just be able to check for a REG_DEAD note
4081 on a function call. However, the REG_DEAD notes are
4082 apparently not dependable around libcalls; c-torture
4083 execute/920501-2 is a test case. If SCANSET is set,
4084 then this insn sets the register, so it must have
4085 died earlier. Unfortunately, this will only handle
4086 the cases in which the register is, in fact, set in a
4089 /* ??? We shouldn't have to use FOUNDINSN here.
4090 However, the LOG_LINKS fields are apparently not
4091 entirely reliable around libcalls;
4092 newlib/libm/math/e_pow.c is a test case. Sometimes
4093 an insn will appear in LOG_LINKS even though it is
4094 not the most recent insn which sets the register. */
4098 || find_reg_note (scan, REG_DEAD, reg)))
4107 /* Either there was a branch, or some insn used REG
4108 other than as a function call address. */
4112 /* Create a code label, and put it in a REG_LABEL note on
4113 the insn which sets the register, and on each call insn
4114 which uses the register. In final_prescan_insn we look
4115 for the REG_LABEL notes, and output the appropriate label
4118 label = gen_label_rtx ();
4119 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4121 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4130 scan = NEXT_INSN (scan);
4132 && ((GET_CODE (scan) == CALL_INSN
4133 && reg_mentioned_p (reg, scan))
4134 || ((reg2 = sfunc_uses_reg (scan))
4135 && REGNO (reg2) == REGNO (reg))))
4137 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4139 while (scan != dies);
4145 fixup_addr_diff_vecs (first);
4149 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4150 shorten_branches (first);
4152 /* Scan the function looking for move instructions which have to be
4153 changed to pc-relative loads and insert the literal tables. */
4155 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4156 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4160 /* ??? basic block reordering can move a switch table dispatch
4161 below the switch table. Check if that has happened.
4162 We only have the addresses available when optimizing; but then,
4163 this check shouldn't be needed when not optimizing. */
4164 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4166 && (INSN_ADDRESSES (INSN_UID (insn))
4167 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4169 /* Change the mova into a load.
4170 broken_move will then return true for it. */
4173 else if (! num_mova++)
4176 else if (GET_CODE (insn) == JUMP_INSN
4177 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4185 /* Some code might have been inserted between the mova and
4186 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4187 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4188 total += get_attr_length (scan);
4190 /* range of mova is 1020, add 4 because pc counts from address of
4191 second instruction after this one, subtract 2 in case pc is 2
4192 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4193 cancels out with alignment effects of the mova itself. */
4196 /* Change the mova into a load, and restart scanning
4197 there. broken_move will then return true for mova. */
4202 if (broken_move (insn)
4203 || (GET_CODE (insn) == INSN
4204 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4207 /* Scan ahead looking for a barrier to stick the constant table
4209 rtx barrier = find_barrier (num_mova, mova, insn);
4210 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4211 int need_aligned_label = 0;
4213 if (num_mova && ! mova_p (mova))
4215 /* find_barrier had to change the first mova into a
4216 pcload; thus, we have to start with this new pcload. */
4220 /* Now find all the moves between the points and modify them. */
4221 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4223 if (GET_CODE (scan) == CODE_LABEL)
4225 if (GET_CODE (scan) == INSN
4226 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4227 need_aligned_label = 1;
4228 if (broken_move (scan))
4230 rtx *patp = &PATTERN (scan), pat = *patp;
4234 enum machine_mode mode;
4236 if (GET_CODE (pat) == PARALLEL)
4237 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4238 src = SET_SRC (pat);
4239 dst = SET_DEST (pat);
4240 mode = GET_MODE (dst);
4242 if (mode == SImode && hi_const (src)
4243 && REGNO (dst) != FPUL_REG)
4248 while (GET_CODE (dst) == SUBREG)
4250 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4251 GET_MODE (SUBREG_REG (dst)),
4254 dst = SUBREG_REG (dst);
4256 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4258 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4260 /* This must be an insn that clobbers r0. */
4261 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4262 XVECLEN (PATTERN (scan), 0)
4264 rtx clobber = *clobberp;
4266 if (GET_CODE (clobber) != CLOBBER
4267 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4271 && reg_set_between_p (r0_rtx, last_float_move, scan))
4275 && GET_MODE_SIZE (mode) != 4
4276 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4278 lab = add_constant (src, mode, last_float);
4280 emit_insn_before (gen_mova (lab), scan);
4283 /* There will be a REG_UNUSED note for r0 on
4284 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4285 lest reorg:mark_target_live_regs will not
4286 consider r0 to be used, and we end up with delay
4287 slot insn in front of SCAN that clobbers r0. */
4289 = find_regno_note (last_float_move, REG_UNUSED, 0);
4291 /* If we are not optimizing, then there may not be
4294 PUT_MODE (note, REG_INC);
4296 *last_float_addr = r0_inc_rtx;
4298 last_float_move = scan;
4300 newsrc = gen_rtx_MEM (mode,
4301 (((TARGET_SH4 && ! TARGET_FMOVD)
4302 || REGNO (dst) == FPUL_REG)
4305 last_float_addr = &XEXP (newsrc, 0);
4307 /* Remove the clobber of r0. */
4308 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4309 gen_rtx_SCRATCH (Pmode));
4310 RTX_UNCHANGING_P (newsrc) = 1;
4312 /* This is a mova needing a label. Create it. */
4313 else if (GET_CODE (src) == UNSPEC
4314 && XINT (src, 1) == UNSPEC_MOVA
4315 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4317 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4318 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4319 newsrc = gen_rtx_UNSPEC (SImode,
4320 gen_rtvec (1, newsrc),
4325 lab = add_constant (src, mode, 0);
4326 newsrc = gen_rtx_MEM (mode,
4327 gen_rtx_LABEL_REF (VOIDmode, lab));
4328 RTX_UNCHANGING_P (newsrc) = 1;
4330 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4331 INSN_CODE (scan) = -1;
4334 dump_table (need_aligned_label ? insn : 0, barrier);
4339 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4340 INSN_ADDRESSES_FREE ();
4341 split_branches (first);
4343 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4344 also has an effect on the register that holds the address of the sfunc.
4345 Insert an extra dummy insn in front of each sfunc that pretends to
4346 use this register. */
4347 if (flag_delayed_branch)
4349 for (insn = first; insn; insn = NEXT_INSN (insn))
4351 rtx reg = sfunc_uses_reg (insn);
4355 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4359 /* fpscr is not actually a user variable, but we pretend it is for the
4360 sake of the previous optimization passes, since we want it handled like
4361 one. However, we don't have any debugging information for it, so turn
4362 it into a non-user variable now. */
4364 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4366 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4370 get_dest_uid (rtx label, int max_uid)
4372 rtx dest = next_real_insn (label);
4375 /* This can happen for an undefined label. */
4377 dest_uid = INSN_UID (dest);
4378 /* If this is a newly created branch redirection blocking instruction,
4379 we cannot index the branch_uid or insn_addresses arrays with its
4380 uid. But then, we won't need to, because the actual destination is
4381 the following branch. */
4382 while (dest_uid >= max_uid)
4384 dest = NEXT_INSN (dest);
4385 dest_uid = INSN_UID (dest);
4387 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4392 /* Split condbranches that are out of range. Also add clobbers for
4393 scratch registers that are needed in far jumps.
4394 We do this before delay slot scheduling, so that it can take our
4395 newly created instructions into account. It also allows us to
4396 find branches with common targets more easily. */
4399 split_branches (rtx first)
4402 struct far_branch **uid_branch, *far_branch_list = 0;
4403 int max_uid = get_max_uid ();
4405 /* Find out which branches are out of range. */
4406 shorten_branches (first);
4408 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4409 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4411 for (insn = first; insn; insn = NEXT_INSN (insn))
4412 if (! INSN_P (insn))
4414 else if (INSN_DELETED_P (insn))
4416 /* Shorten_branches would split this instruction again,
4417 so transform it into a note. */
4418 PUT_CODE (insn, NOTE);
4419 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4420 NOTE_SOURCE_FILE (insn) = 0;
4422 else if (GET_CODE (insn) == JUMP_INSN
4423 /* Don't mess with ADDR_DIFF_VEC */
4424 && (GET_CODE (PATTERN (insn)) == SET
4425 || GET_CODE (PATTERN (insn)) == RETURN))
4427 enum attr_type type = get_attr_type (insn);
4428 if (type == TYPE_CBRANCH)
4432 if (get_attr_length (insn) > 4)
4434 rtx src = SET_SRC (PATTERN (insn));
4435 rtx olabel = XEXP (XEXP (src, 1), 0);
4436 int addr = INSN_ADDRESSES (INSN_UID (insn));
4438 int dest_uid = get_dest_uid (olabel, max_uid);
4439 struct far_branch *bp = uid_branch[dest_uid];
4441 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4442 the label if the LABEL_NUSES count drops to zero. There is
4443 always a jump_optimize pass that sets these values, but it
4444 proceeds to delete unreferenced code, and then if not
4445 optimizing, to un-delete the deleted instructions, thus
4446 leaving labels with too low uses counts. */
4449 JUMP_LABEL (insn) = olabel;
4450 LABEL_NUSES (olabel)++;
4454 bp = (struct far_branch *) alloca (sizeof *bp);
4455 uid_branch[dest_uid] = bp;
4456 bp->prev = far_branch_list;
4457 far_branch_list = bp;
4459 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4460 LABEL_NUSES (bp->far_label)++;
4464 label = bp->near_label;
4465 if (! label && bp->address - addr >= CONDJUMP_MIN)
4467 rtx block = bp->insert_place;
4469 if (GET_CODE (PATTERN (block)) == RETURN)
4470 block = PREV_INSN (block);
4472 block = gen_block_redirect (block,
4474 label = emit_label_after (gen_label_rtx (),
4476 bp->near_label = label;
4478 else if (label && ! NEXT_INSN (label))
4480 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4481 bp->insert_place = insn;
4483 gen_far_branch (bp);
4487 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4489 bp->near_label = label = gen_label_rtx ();
4490 bp->insert_place = insn;
4493 if (! redirect_jump (insn, label, 1))
4498 /* get_attr_length (insn) == 2 */
4499 /* Check if we have a pattern where reorg wants to redirect
4500 the branch to a label from an unconditional branch that
4502 /* We can't use JUMP_LABEL here because it might be undefined
4503 when not optimizing. */
4504 /* A syntax error might cause beyond to be NULL_RTX. */
4506 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4510 && (GET_CODE (beyond) == JUMP_INSN
4511 || ((beyond = next_active_insn (beyond))
4512 && GET_CODE (beyond) == JUMP_INSN))
4513 && GET_CODE (PATTERN (beyond)) == SET
4514 && recog_memoized (beyond) == CODE_FOR_jump_compact
4516 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4517 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4519 gen_block_redirect (beyond,
4520 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4523 next = next_active_insn (insn);
4525 if ((GET_CODE (next) == JUMP_INSN
4526 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4527 && GET_CODE (PATTERN (next)) == SET
4528 && recog_memoized (next) == CODE_FOR_jump_compact
4530 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4531 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4533 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4535 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4537 int addr = INSN_ADDRESSES (INSN_UID (insn));
4540 struct far_branch *bp;
4542 if (type == TYPE_JUMP)
4544 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4545 dest_uid = get_dest_uid (far_label, max_uid);
4548 /* Parse errors can lead to labels outside
4550 if (! NEXT_INSN (far_label))
4555 JUMP_LABEL (insn) = far_label;
4556 LABEL_NUSES (far_label)++;
4558 redirect_jump (insn, NULL_RTX, 1);
4562 bp = uid_branch[dest_uid];
4565 bp = (struct far_branch *) alloca (sizeof *bp);
4566 uid_branch[dest_uid] = bp;
4567 bp->prev = far_branch_list;
4568 far_branch_list = bp;
4570 bp->far_label = far_label;
4572 LABEL_NUSES (far_label)++;
4574 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4575 if (addr - bp->address <= CONDJUMP_MAX)
4576 emit_label_after (bp->near_label, PREV_INSN (insn));
4579 gen_far_branch (bp);
4585 bp->insert_place = insn;
4587 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4589 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4592 /* Generate all pending far branches,
4593 and free our references to the far labels. */
4594 while (far_branch_list)
4596 if (far_branch_list->near_label
4597 && ! NEXT_INSN (far_branch_list->near_label))
4598 gen_far_branch (far_branch_list);
4600 && far_branch_list->far_label
4601 && ! --LABEL_NUSES (far_branch_list->far_label))
4602 delete_insn (far_branch_list->far_label);
4603 far_branch_list = far_branch_list->prev;
4606 /* Instruction length information is no longer valid due to the new
4607 instructions that have been generated. */
4608 init_insn_lengths ();
4611 /* Dump out instruction addresses, which is useful for debugging the
4612 constant pool table stuff.
4614 If relaxing, output the label and pseudo-ops used to link together
4615 calls and the instruction which set the registers. */
4617 /* ??? The addresses printed by this routine for insns are nonsense for
4618 insns which are inside of a sequence where none of the inner insns have
4619 variable length. This is because the second pass of shorten_branches
4620 does not bother to update them. */
4623 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4624 int noperands ATTRIBUTE_UNUSED)
4626 if (TARGET_DUMPISIZE)
4627 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4633 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4638 pattern = PATTERN (insn);
4639 if (GET_CODE (pattern) == PARALLEL)
4640 pattern = XVECEXP (pattern, 0, 0);
4641 if (GET_CODE (pattern) == CALL
4642 || (GET_CODE (pattern) == SET
4643 && (GET_CODE (SET_SRC (pattern)) == CALL
4644 || get_attr_type (insn) == TYPE_SFUNC)))
4645 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4646 CODE_LABEL_NUMBER (XEXP (note, 0)));
4647 else if (GET_CODE (pattern) == SET)
4648 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4649 CODE_LABEL_NUMBER (XEXP (note, 0)));
4656 /* Dump out any constants accumulated in the final pass. These will
4660 output_jump_label_table (void)
4666 fprintf (asm_out_file, "\t.align 2\n");
4667 for (i = 0; i < pool_size; i++)
4669 pool_node *p = &pool_vector[i];
4671 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4672 CODE_LABEL_NUMBER (p->label));
4673 output_asm_insn (".long %O0", &p->value);
4681 /* A full frame looks like:
4685 [ if current_function_anonymous_args
4698 local-0 <- fp points here. */
4700 /* Number of bytes pushed for anonymous args, used to pass information
4701 between expand_prologue and expand_epilogue. */
4703 static int extra_push;
4705 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4706 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4707 for an epilogue and a negative value means that it's for a sibcall
4708 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4709 all the registers that are about to be restored, and hence dead. */
4712 output_stack_adjust (int size, rtx reg, int epilogue_p,
4713 HARD_REG_SET *live_regs_mask)
4715 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4718 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4723 if (CONST_OK_FOR_ADD (size))
4724 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4725 /* Try to do it with two partial adjustments; however, we must make
4726 sure that the stack is properly aligned at all times, in case
4727 an interrupt occurs between the two partial adjustments. */
4728 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4729 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4731 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4732 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4738 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4741 /* If TEMP is invalid, we could temporarily save a general
4742 register to MACL. However, there is currently no need
4743 to handle this case, so just abort when we see it. */
4745 || current_function_interrupt
4746 || ! call_used_regs[temp] || fixed_regs[temp])
4748 if (temp < 0 && ! current_function_interrupt
4749 && (TARGET_SHMEDIA || epilogue_p >= 0))
4752 COPY_HARD_REG_SET (temps, call_used_reg_set);
4753 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4757 if (current_function_return_rtx)
4759 enum machine_mode mode;
4760 mode = GET_MODE (current_function_return_rtx);
4761 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4762 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4764 for (i = 0; i < nreg; i++)
4765 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4766 if (current_function_calls_eh_return)
4768 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4769 for (i = 0; i <= 3; i++)
4770 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4773 if (TARGET_SHMEDIA && epilogue_p < 0)
4774 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4775 CLEAR_HARD_REG_BIT (temps, i);
4776 if (epilogue_p <= 0)
4778 for (i = FIRST_PARM_REG;
4779 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4780 CLEAR_HARD_REG_BIT (temps, i);
4781 if (cfun->static_chain_decl != NULL)
4782 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4784 temp = scavenge_reg (&temps);
4786 if (temp < 0 && live_regs_mask)
4787 temp = scavenge_reg (live_regs_mask);
4790 /* If we reached here, the most likely case is the (sibcall)
4791 epilogue for non SHmedia. Put a special push/pop sequence
4792 for such case as the last resort. This looks lengthy but
4793 would not be problem because it seems to be very rare. */
4794 if (! TARGET_SHMEDIA && epilogue_p)
4796 rtx adj_reg, tmp_reg, mem;
4798 /* ??? There is still the slight possibility that r4 or r5
4799 have been reserved as fixed registers or assigned as
4800 global registers, and they change during an interrupt.
4801 There are possible ways to handle this:
4802 - If we are adjusting the frame pointer (r14), we can do
4803 with a single temp register and an ordinary push / pop
4805 - Grab any call-used or call-saved registers (i.e. not
4806 fixed or globals) for the temps we need. We might
4807 also grab r14 if we are adjusting the stack pointer.
4808 If we can't find enough available registers, issue
4809 a diagnostic and abort - the user must have reserved
4810 way too many registers.
4811 But since all this is rather unlikely to happen and
4812 would require extra testing, we just abort if r4 / r5
4813 are not available. */
4814 if (fixed_regs[4] || fixed_regs[5]
4815 || global_regs[4] || global_regs[5])
4818 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4819 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4820 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4821 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4822 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4823 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4824 emit_move_insn (mem, tmp_reg);
4825 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4826 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4827 emit_move_insn (mem, tmp_reg);
4828 emit_move_insn (reg, adj_reg);
4829 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4830 emit_move_insn (adj_reg, mem);
4831 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4832 emit_move_insn (tmp_reg, mem);
4838 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4840 /* If SIZE is negative, subtract the positive value.
4841 This sometimes allows a constant pool entry to be shared
4842 between prologue and epilogue code. */
4845 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4846 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4850 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4851 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4855 = (gen_rtx_EXPR_LIST
4856 (REG_FRAME_RELATED_EXPR,
4857 gen_rtx_SET (VOIDmode, reg,
4858 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4868 RTX_FRAME_RELATED_P (x) = 1;
4872 /* Output RTL to push register RN onto the stack. */
4879 x = gen_push_fpul ();
4880 else if (rn == FPSCR_REG)
4881 x = gen_push_fpscr ();
4882 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4883 && FP_OR_XD_REGISTER_P (rn))
4885 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4887 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4889 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4890 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4892 x = gen_push (gen_rtx_REG (SImode, rn));
4896 = gen_rtx_EXPR_LIST (REG_INC,
4897 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4901 /* Output RTL to pop register RN from the stack. */
4908 x = gen_pop_fpul ();
4909 else if (rn == FPSCR_REG)
4910 x = gen_pop_fpscr ();
4911 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4912 && FP_OR_XD_REGISTER_P (rn))
4914 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4916 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4918 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4919 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4921 x = gen_pop (gen_rtx_REG (SImode, rn));
4925 = gen_rtx_EXPR_LIST (REG_INC,
4926 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4929 /* Generate code to push the regs specified in the mask. */
4932 push_regs (HARD_REG_SET *mask, int interrupt_handler)
4937 /* Push PR last; this gives better latencies after the prologue, and
4938 candidates for the return delay slot when there are no general
4939 registers pushed. */
4940 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4942 /* If this is an interrupt handler, and the SZ bit varies,
4943 and we have to push any floating point register, we need
4944 to switch to the correct precision first. */
4945 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4946 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4948 HARD_REG_SET unsaved;
4951 COMPL_HARD_REG_SET (unsaved, *mask);
4952 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4956 && (i != FPSCR_REG || ! skip_fpscr)
4957 && TEST_HARD_REG_BIT (*mask, i))
4960 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4964 /* Calculate how much extra space is needed to save all callee-saved
4966 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4969 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
4972 int stack_space = 0;
4973 int interrupt_handler = sh_cfun_interrupt_handler_p ();
4975 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4976 if ((! call_used_regs[reg] || interrupt_handler)
4977 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4978 /* Leave space to save this target register on the stack,
4979 in case target register allocation wants to use it. */
4980 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4984 /* Decide whether we should reserve space for callee-save target registers,
4985 in case target register allocation wants to use them. REGS_SAVED is
4986 the space, in bytes, that is already required for register saves.
4987 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4990 shmedia_reserve_space_for_target_registers_p (int regs_saved,
4991 HARD_REG_SET *live_regs_mask)
4995 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
4998 /* Decide how much space to reserve for callee-save target registers
4999 in case target register allocation wants to use them.
5000 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5003 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5005 if (shmedia_space_reserved_for_target_registers)
5006 return shmedia_target_regs_stack_space (live_regs_mask);
5011 /* Work out the registers which need to be saved, both as a mask and a
5012 count of saved words. Return the count.
5014 If doing a pragma interrupt function, then push all regs used by the
5015 function, and if we call another function (we can tell by looking at PR),
5016 make sure that all the regs it clobbers are safe too. */
5019 calc_live_regs (HARD_REG_SET *live_regs_mask)
5023 int interrupt_handler;
5024 int pr_live, has_call;
5026 interrupt_handler = sh_cfun_interrupt_handler_p ();
5028 CLEAR_HARD_REG_SET (*live_regs_mask);
5029 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
5030 && regs_ever_live[FPSCR_REG])
5031 target_flags &= ~FPU_SINGLE_BIT;
5032 /* If we can save a lot of saves by switching to double mode, do that. */
5033 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
5034 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5035 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5036 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
5039 target_flags &= ~FPU_SINGLE_BIT;
5042 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5043 knows how to use it. That means the pseudo originally allocated for
5044 the initial value can become the PR_MEDIA_REG hard register, as seen for
5045 execute/20010122-1.c:test9. */
5047 /* ??? this function is called from initial_elimination_offset, hence we
5048 can't use the result of sh_media_register_for_return here. */
5049 pr_live = sh_pr_n_sets ();
5052 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5053 pr_live = (pr_initial
5054 ? (GET_CODE (pr_initial) != REG
5055 || REGNO (pr_initial) != (PR_REG))
5056 : regs_ever_live[PR_REG]);
5057 /* For Shcompact, if not optimizing, we end up with a memory reference
5058 using the return address pointer for __builtin_return_address even
5059 though there is no actual need to put the PR register on the stack. */
5060 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5062 /* Force PR to be live if the prologue has to call the SHmedia
5063 argument decoder or register saver. */
5064 if (TARGET_SHCOMPACT
5065 && ((current_function_args_info.call_cookie
5066 & ~ CALL_COOKIE_RET_TRAMP (1))
5067 || current_function_has_nonlocal_label))
5069 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5070 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5072 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5074 : (interrupt_handler && ! pragma_trapa)
5075 ? (/* Need to save all the regs ever live. */
5076 (regs_ever_live[reg]
5077 || (call_used_regs[reg]
5078 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
5080 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5081 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5082 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5083 && reg != RETURN_ADDRESS_POINTER_REGNUM
5084 && reg != T_REG && reg != GBR_REG
5085 /* Push fpscr only on targets which have FPU */
5086 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5087 : (/* Only push those regs which are used and need to be saved. */
5090 && current_function_args_info.call_cookie
5091 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5092 || (regs_ever_live[reg] && ! call_used_regs[reg])
5093 || (current_function_calls_eh_return
5094 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5095 || reg == (int) EH_RETURN_DATA_REGNO (1)
5096 || reg == (int) EH_RETURN_DATA_REGNO (2)
5097 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5098 || ((reg == MACL_REG || reg == MACH_REG)
5099 && regs_ever_live[reg]
5100 && sh_cfun_attr_renesas_p ())
5103 SET_HARD_REG_BIT (*live_regs_mask, reg);
5104 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5106 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
5107 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5109 if (FP_REGISTER_P (reg))
5111 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5113 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5114 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5117 else if (XD_REGISTER_P (reg))
5119 /* Must switch to double mode to access these registers. */
5120 target_flags &= ~FPU_SINGLE_BIT;
5125 /* If we have a target register optimization pass after prologue / epilogue
5126 threading, we need to assume all target registers will be live even if
5128 if (flag_branch_target_load_optimize2
5129 && TARGET_SAVE_ALL_TARGET_REGS
5130 && shmedia_space_reserved_for_target_registers)
5131 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5132 if ((! call_used_regs[reg] || interrupt_handler)
5133 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5135 SET_HARD_REG_BIT (*live_regs_mask, reg);
5136 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5138 /* If this is an interrupt handler, we don't have any call-clobbered
5139 registers we can conveniently use for target register save/restore.
5140 Make sure we save at least one general purpose register when we need
5141 to save target registers. */
5142 if (interrupt_handler
5143 && hard_regs_intersect_p (live_regs_mask,
5144 ®_class_contents[TARGET_REGS])
5145 && ! hard_regs_intersect_p (live_regs_mask,
5146 ®_class_contents[GENERAL_REGS]))
5148 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5149 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5155 /* Code to generate prologue and epilogue sequences */
5157 /* PUSHED is the number of bytes that are being pushed on the
5158 stack for register saves. Return the frame size, padded
5159 appropriately so that the stack stays properly aligned. */
5160 static HOST_WIDE_INT
5161 rounded_frame_size (int pushed)
5163 HOST_WIDE_INT size = get_frame_size ();
5164 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5166 return ((size + pushed + align - 1) & -align) - pushed;
5169 /* Choose a call-clobbered target-branch register that remains
5170 unchanged along the whole function. We set it up as the return
5171 value in the prologue. */
5173 sh_media_register_for_return (void)
5178 if (! current_function_is_leaf)
5180 if (lookup_attribute ("interrupt_handler",
5181 DECL_ATTRIBUTES (current_function_decl)))
5184 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5186 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5187 if (call_used_regs[regno] && ! regs_ever_live[regno])
5193 /* The maximum registers we need to save are:
5194 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5195 - 32 floating point registers (for each pair, we save none,
5196 one single precision value, or a double precision value).
5197 - 8 target registers
5198 - add 1 entry for a delimiter. */
5199 #define MAX_SAVED_REGS (62+32+8)
5201 typedef struct save_entry_s
5210 /* There will be a delimiter entry with VOIDmode both at the start and the
5211 end of a filled in schedule. The end delimiter has the offset of the
5212 save with the smallest (i.e. most negative) offset. */
5213 typedef struct save_schedule_s
5215 save_entry entries[MAX_SAVED_REGS + 2];
5216 int temps[MAX_TEMPS+1];
5219 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5220 use reverse order. Returns the last entry written to (not counting
5221 the delimiter). OFFSET_BASE is a number to be added to all offset
5225 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5229 save_entry *entry = schedule->entries;
5233 if (! current_function_interrupt)
5234 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5235 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5236 && ! FUNCTION_ARG_REGNO_P (i)
5237 && i != FIRST_RET_REG
5238 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5239 && ! (current_function_calls_eh_return
5240 && (i == EH_RETURN_STACKADJ_REGNO
5241 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5242 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5243 schedule->temps[tmpx++] = i;
5245 entry->mode = VOIDmode;
5246 entry->offset = offset_base;
5248 /* We loop twice: first, we save 8-byte aligned registers in the
5249 higher addresses, that are known to be aligned. Then, we
5250 proceed to saving 32-bit registers that don't need 8-byte
5252 If this is an interrupt function, all registers that need saving
5253 need to be saved in full. moreover, we need to postpone saving
5254 target registers till we have saved some general purpose registers
5255 we can then use as scratch registers. */
5256 offset = offset_base;
5257 for (align = 1; align >= 0; align--)
5259 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5260 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5262 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5265 if (current_function_interrupt)
5267 if (TARGET_REGISTER_P (i))
5269 if (GENERAL_REGISTER_P (i))
5272 if (mode == SFmode && (i % 2) == 1
5273 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5274 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5281 /* If we're doing the aligned pass and this is not aligned,
5282 or we're doing the unaligned pass and this is aligned,
5284 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5288 if (current_function_interrupt
5289 && GENERAL_REGISTER_P (i)
5290 && tmpx < MAX_TEMPS)
5291 schedule->temps[tmpx++] = i;
5293 offset -= GET_MODE_SIZE (mode);
5296 entry->offset = offset;
5299 if (align && current_function_interrupt)
5300 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5301 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5303 offset -= GET_MODE_SIZE (DImode);
5305 entry->mode = DImode;
5306 entry->offset = offset;
5311 entry->mode = VOIDmode;
5312 entry->offset = offset;
5313 schedule->temps[tmpx] = -1;
5318 sh_expand_prologue (void)
5320 HARD_REG_SET live_regs_mask;
5323 int save_flags = target_flags;
5325 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5327 /* We have pretend args if we had an object sent partially in registers
5328 and partially on the stack, e.g. a large structure. */
5329 output_stack_adjust (-current_function_pretend_args_size
5330 - current_function_args_info.stack_regs * 8,
5331 stack_pointer_rtx, 0, NULL);
5335 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5336 /* We're going to use the PIC register to load the address of the
5337 incoming-argument decoder and/or of the return trampoline from
5338 the GOT, so make sure the PIC register is preserved and
5340 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5342 if (TARGET_SHCOMPACT
5343 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5347 /* First, make all registers with incoming arguments that will
5348 be pushed onto the stack live, so that register renaming
5349 doesn't overwrite them. */
5350 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5351 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5352 >= NPARM_REGS (SImode) - reg)
5353 for (; reg < NPARM_REGS (SImode); reg++)
5354 emit_insn (gen_shcompact_preserve_incoming_args
5355 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5356 else if (CALL_COOKIE_INT_REG_GET
5357 (current_function_args_info.call_cookie, reg) == 1)
5358 emit_insn (gen_shcompact_preserve_incoming_args
5359 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5361 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5363 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5364 GEN_INT (current_function_args_info.call_cookie));
5365 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5366 gen_rtx_REG (SImode, R0_REG));
5368 else if (TARGET_SHMEDIA)
5370 int tr = sh_media_register_for_return ();
5374 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5375 gen_rtx_REG (DImode, PR_MEDIA_REG));
5377 /* ??? We should suppress saving pr when we don't need it, but this
5378 is tricky because of builtin_return_address. */
5380 /* If this function only exits with sibcalls, this copy
5381 will be flagged as dead. */
5382 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5388 /* Emit the code for SETUP_VARARGS. */
5389 if (current_function_stdarg)
5391 /* This is not used by the SH2E calling convention */
5392 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5
5393 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
5395 /* Push arg regs as if they'd been provided by caller in stack. */
5396 for (i = 0; i < NPARM_REGS(SImode); i++)
5398 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5401 if (i >= (NPARM_REGS(SImode)
5402 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5406 RTX_FRAME_RELATED_P (insn) = 0;
5412 /* If we're supposed to switch stacks at function entry, do so now. */
5414 emit_insn (gen_sp_switch_1 ());
5416 d = calc_live_regs (&live_regs_mask);
5417 /* ??? Maybe we could save some switching if we can move a mode switch
5418 that already happens to be at the function start into the prologue. */
5419 if (target_flags != save_flags && ! current_function_interrupt)
5420 emit_insn (gen_toggle_sz ());
5424 int offset_base, offset;
5426 int offset_in_r0 = -1;
5428 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5429 int total_size, save_size;
5430 save_schedule schedule;
5434 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5435 && ! current_function_interrupt)
5436 r0 = gen_rtx_REG (Pmode, R0_REG);
5438 /* D is the actual number of bytes that we need for saving registers,
5439 however, in initial_elimination_offset we have committed to using
5440 an additional TREGS_SPACE amount of bytes - in order to keep both
5441 addresses to arguments supplied by the caller and local variables
5442 valid, we must keep this gap. Place it between the incoming
5443 arguments and the actually saved registers in a bid to optimize
5444 locality of reference. */
5445 total_size = d + tregs_space;
5446 total_size += rounded_frame_size (total_size);
5447 save_size = total_size - rounded_frame_size (d);
5448 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5449 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5450 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5452 /* If adjusting the stack in a single step costs nothing extra, do so.
5453 I.e. either if a single addi is enough, or we need a movi anyway,
5454 and we don't exceed the maximum offset range (the test for the
5455 latter is conservative for simplicity). */
5457 && (CONST_OK_FOR_I10 (-total_size)
5458 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5459 && total_size <= 2044)))
5460 d_rounding = total_size - save_size;
5462 offset_base = d + d_rounding;
5464 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5467 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5468 tmp_pnt = schedule.temps;
5469 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5471 enum machine_mode mode = entry->mode;
5472 int reg = entry->reg;
5473 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5475 offset = entry->offset;
5477 reg_rtx = gen_rtx_REG (mode, reg);
5479 mem_rtx = gen_rtx_MEM (mode,
5480 gen_rtx_PLUS (Pmode,
5484 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5492 if (HAVE_PRE_DECREMENT
5493 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5494 || mem_rtx == NULL_RTX
5495 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5497 pre_dec = gen_rtx_MEM (mode,
5498 gen_rtx_PRE_DEC (Pmode, r0));
5500 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5509 offset += GET_MODE_SIZE (mode);
5513 if (mem_rtx != NULL_RTX)
5516 if (offset_in_r0 == -1)
5518 emit_move_insn (r0, GEN_INT (offset));
5519 offset_in_r0 = offset;
5521 else if (offset != offset_in_r0)
5526 GEN_INT (offset - offset_in_r0)));
5527 offset_in_r0 += offset - offset_in_r0;
5530 if (pre_dec != NULL_RTX)
5536 (Pmode, r0, stack_pointer_rtx));
5540 offset -= GET_MODE_SIZE (mode);
5541 offset_in_r0 -= GET_MODE_SIZE (mode);
5546 mem_rtx = gen_rtx_MEM (mode, r0);
5548 mem_rtx = gen_rtx_MEM (mode,
5549 gen_rtx_PLUS (Pmode,
5553 /* We must not use an r0-based address for target-branch
5554 registers or for special registers without pre-dec
5555 memory addresses, since we store their values in r0
5557 if (TARGET_REGISTER_P (reg)
5558 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5559 && mem_rtx != pre_dec))
5563 if (TARGET_REGISTER_P (reg)
5564 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5565 && mem_rtx != pre_dec))
5567 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5569 emit_move_insn (tmp_reg, reg_rtx);
5571 if (REGNO (tmp_reg) == R0_REG)
5575 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5579 if (*++tmp_pnt <= 0)
5580 tmp_pnt = schedule.temps;
5587 /* Mark as interesting for dwarf cfi generator */
5588 insn = emit_move_insn (mem_rtx, reg_rtx);
5589 RTX_FRAME_RELATED_P (insn) = 1;
5591 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5593 rtx reg_rtx = gen_rtx_REG (mode, reg);
5595 rtx mem_rtx = gen_rtx_MEM (mode,
5596 gen_rtx_PLUS (Pmode,
5600 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5601 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5603 REG_NOTES (insn) = note_rtx;
5608 if (entry->offset != d_rounding)
5612 push_regs (&live_regs_mask, current_function_interrupt);
5614 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5616 rtx insn = get_last_insn ();
5617 rtx last = emit_insn (gen_GOTaddr2picreg ());
5619 /* Mark these insns as possibly dead. Sometimes, flow2 may
5620 delete all uses of the PIC register. In this case, let it
5621 delete the initialization too. */
5624 insn = NEXT_INSN (insn);
5626 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5630 while (insn != last);
5633 if (SHMEDIA_REGS_STACK_ADJUST ())
5635 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5636 function_symbol (TARGET_FPU_ANY
5637 ? "__GCC_push_shmedia_regs"
5638 : "__GCC_push_shmedia_regs_nofpu"));
5639 /* This must NOT go through the PLT, otherwise mach and macl
5640 may be clobbered. */
5641 emit_insn (gen_shmedia_save_restore_regs_compact
5642 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5645 if (target_flags != save_flags && ! current_function_interrupt)
5647 rtx insn = emit_insn (gen_toggle_sz ());
5649 /* If we're lucky, a mode switch in the function body will
5650 overwrite fpscr, turning this insn dead. Tell flow this
5651 insn is ok to delete. */
5652 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5657 target_flags = save_flags;
5659 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5660 stack_pointer_rtx, 0, NULL);
5662 if (frame_pointer_needed)
5663 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5665 if (TARGET_SHCOMPACT
5666 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5668 /* This must NOT go through the PLT, otherwise mach and macl
5669 may be clobbered. */
5670 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5671 function_symbol ("__GCC_shcompact_incoming_args"));
5672 emit_insn (gen_shcompact_incoming_args ());
5677 sh_expand_epilogue (bool sibcall_p)
5679 HARD_REG_SET live_regs_mask;
5683 int save_flags = target_flags;
5684 int frame_size, save_size;
5685 int fpscr_deferred = 0;
5686 int e = sibcall_p ? -1 : 1;
5688 d = calc_live_regs (&live_regs_mask);
5691 frame_size = rounded_frame_size (d);
5695 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5697 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5698 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5699 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5701 total_size = d + tregs_space;
5702 total_size += rounded_frame_size (total_size);
5703 save_size = total_size - frame_size;
5705 /* If adjusting the stack in a single step costs nothing extra, do so.
5706 I.e. either if a single addi is enough, or we need a movi anyway,
5707 and we don't exceed the maximum offset range (the test for the
5708 latter is conservative for simplicity). */
5710 && ! frame_pointer_needed
5711 && (CONST_OK_FOR_I10 (total_size)
5712 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5713 && total_size <= 2044)))
5714 d_rounding = frame_size;
5716 frame_size -= d_rounding;
5719 if (frame_pointer_needed)
5721 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5723 /* We must avoid moving the stack pointer adjustment past code
5724 which reads from the local frame, else an interrupt could
5725 occur after the SP adjustment and clobber data in the local
5727 emit_insn (gen_blockage ());
5728 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5730 else if (frame_size)
5732 /* We must avoid moving the stack pointer adjustment past code
5733 which reads from the local frame, else an interrupt could
5734 occur after the SP adjustment and clobber data in the local
5736 emit_insn (gen_blockage ());
5737 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5740 if (SHMEDIA_REGS_STACK_ADJUST ())
5742 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5743 function_symbol (TARGET_FPU_ANY
5744 ? "__GCC_pop_shmedia_regs"
5745 : "__GCC_pop_shmedia_regs_nofpu"));
5746 /* This must NOT go through the PLT, otherwise mach and macl
5747 may be clobbered. */
5748 emit_insn (gen_shmedia_save_restore_regs_compact
5749 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5752 /* Pop all the registers. */
5754 if (target_flags != save_flags && ! current_function_interrupt)
5755 emit_insn (gen_toggle_sz ());
5758 int offset_base, offset;
5759 int offset_in_r0 = -1;
5761 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5762 save_schedule schedule;
5766 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5767 offset_base = -entry[1].offset + d_rounding;
5768 tmp_pnt = schedule.temps;
5769 for (; entry->mode != VOIDmode; entry--)
5771 enum machine_mode mode = entry->mode;
5772 int reg = entry->reg;
5773 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5775 offset = offset_base + entry->offset;
5776 reg_rtx = gen_rtx_REG (mode, reg);
5778 mem_rtx = gen_rtx_MEM (mode,
5779 gen_rtx_PLUS (Pmode,
5783 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5789 if (HAVE_POST_INCREMENT
5790 && (offset == offset_in_r0
5791 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5792 && mem_rtx == NULL_RTX)
5793 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5795 post_inc = gen_rtx_MEM (mode,
5796 gen_rtx_POST_INC (Pmode, r0));
5798 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5801 post_inc = NULL_RTX;
5810 if (mem_rtx != NULL_RTX)
5813 if (offset_in_r0 == -1)
5815 emit_move_insn (r0, GEN_INT (offset));
5816 offset_in_r0 = offset;
5818 else if (offset != offset_in_r0)
5823 GEN_INT (offset - offset_in_r0)));
5824 offset_in_r0 += offset - offset_in_r0;
5827 if (post_inc != NULL_RTX)
5833 (Pmode, r0, stack_pointer_rtx));
5839 offset_in_r0 += GET_MODE_SIZE (mode);
5842 mem_rtx = gen_rtx_MEM (mode, r0);
5844 mem_rtx = gen_rtx_MEM (mode,
5845 gen_rtx_PLUS (Pmode,
5849 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5850 && mem_rtx != post_inc)
5854 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5855 && mem_rtx != post_inc)
5857 insn = emit_move_insn (r0, mem_rtx);
5860 else if (TARGET_REGISTER_P (reg))
5862 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5864 /* Give the scheduler a bit of freedom by using up to
5865 MAX_TEMPS registers in a round-robin fashion. */
5866 insn = emit_move_insn (tmp_reg, mem_rtx);
5869 tmp_pnt = schedule.temps;
5872 insn = emit_move_insn (reg_rtx, mem_rtx);
5873 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5874 /* This is dead, unless we return with a sibcall. */
5875 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5880 if (entry->offset + offset_base != d + d_rounding)
5883 else /* ! TARGET_SH5 */
5886 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5888 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5890 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5892 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5893 && hard_regs_intersect_p (&live_regs_mask,
5894 ®_class_contents[DF_REGS]))
5896 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5898 if (j == FIRST_FP_REG && fpscr_deferred)
5903 if (target_flags != save_flags && ! current_function_interrupt)
5904 emit_insn (gen_toggle_sz ());
5905 target_flags = save_flags;
5907 output_stack_adjust (extra_push + current_function_pretend_args_size
5908 + save_size + d_rounding
5909 + current_function_args_info.stack_regs * 8,
5910 stack_pointer_rtx, e, NULL);
5912 if (current_function_calls_eh_return)
5913 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5914 EH_RETURN_STACKADJ_RTX));
5916 /* Switch back to the normal stack if necessary. */
5918 emit_insn (gen_sp_switch_2 ());
5920 /* Tell flow the insn that pops PR isn't dead. */
5921 /* PR_REG will never be live in SHmedia mode, and we don't need to
5922 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5923 by the return pattern. */
5924 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5925 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5928 static int sh_need_epilogue_known = 0;
5931 sh_need_epilogue (void)
5933 if (! sh_need_epilogue_known)
5938 sh_expand_epilogue (0);
5939 epilogue = get_insns ();
5941 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5943 return sh_need_epilogue_known > 0;
5946 /* Emit code to change the current function's return address to RA.
5947 TEMP is available as a scratch register, if needed. */
5950 sh_set_return_address (rtx ra, rtx tmp)
5952 HARD_REG_SET live_regs_mask;
5954 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5957 d = calc_live_regs (&live_regs_mask);
5959 /* If pr_reg isn't life, we can set it (or the register given in
5960 sh_media_register_for_return) directly. */
5961 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5967 int rr_regno = sh_media_register_for_return ();
5972 rr = gen_rtx_REG (DImode, rr_regno);
5975 rr = gen_rtx_REG (SImode, pr_reg);
5977 emit_insn (GEN_MOV (rr, ra));
5978 /* Tell flow the register for return isn't dead. */
5979 emit_insn (gen_rtx_USE (VOIDmode, rr));
5986 save_schedule schedule;
5989 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
5990 offset = entry[1].offset;
5991 for (; entry->mode != VOIDmode; entry--)
5992 if (entry->reg == pr_reg)
5995 /* We can't find pr register. */
5999 offset = entry->offset - offset;
6000 pr_offset = (rounded_frame_size (d) + offset
6001 + SHMEDIA_REGS_STACK_ADJUST ());
6004 pr_offset = rounded_frame_size (d);
6006 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6007 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6009 tmp = gen_rtx_MEM (Pmode, tmp);
6010 emit_insn (GEN_MOV (tmp, ra));
6013 /* Clear variables at function end. */
6016 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6017 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6019 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6020 sh_need_epilogue_known = 0;
6021 sp_switch = NULL_RTX;
6025 sh_builtin_saveregs (void)
6027 /* First unnamed integer register. */
6028 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6029 /* Number of integer registers we need to save. */
6030 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6031 /* First unnamed SFmode float reg */
6032 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6033 /* Number of SFmode float regs to save. */
6034 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6037 HOST_WIDE_INT alias_set;
6043 int pushregs = n_intregs;
6045 while (pushregs < NPARM_REGS (SImode) - 1
6046 && (CALL_COOKIE_INT_REG_GET
6047 (current_function_args_info.call_cookie,
6048 NPARM_REGS (SImode) - pushregs)
6051 current_function_args_info.call_cookie
6052 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6057 if (pushregs == NPARM_REGS (SImode))
6058 current_function_args_info.call_cookie
6059 |= (CALL_COOKIE_INT_REG (0, 1)
6060 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6062 current_function_args_info.call_cookie
6063 |= CALL_COOKIE_STACKSEQ (pushregs);
6065 current_function_pretend_args_size += 8 * n_intregs;
6067 if (TARGET_SHCOMPACT)
6071 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6073 error ("__builtin_saveregs not supported by this subtarget");
6080 /* Allocate block of memory for the regs. */
6081 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6082 Or can assign_stack_local accept a 0 SIZE argument? */
6083 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6086 regbuf = gen_rtx_MEM (BLKmode,
6087 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6088 else if (n_floatregs & 1)
6092 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6093 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6094 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6095 regbuf = change_address (regbuf, BLKmode, addr);
6098 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6099 alias_set = get_varargs_alias_set ();
6100 set_mem_alias_set (regbuf, alias_set);
6103 This is optimized to only save the regs that are necessary. Explicitly
6104 named args need not be saved. */
6106 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6107 adjust_address (regbuf, BLKmode,
6108 n_floatregs * UNITS_PER_WORD),
6112 /* Return the address of the regbuf. */
6113 return XEXP (regbuf, 0);
6116 This is optimized to only save the regs that are necessary. Explicitly
6117 named args need not be saved.
6118 We explicitly build a pointer to the buffer because it halves the insn
6119 count when not optimizing (otherwise the pointer is built for each reg
6121 We emit the moves in reverse order so that we can use predecrement. */
6123 fpregs = gen_reg_rtx (Pmode);
6124 emit_move_insn (fpregs, XEXP (regbuf, 0));
6125 emit_insn (gen_addsi3 (fpregs, fpregs,
6126 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6130 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6132 emit_insn (gen_addsi3 (fpregs, fpregs,
6133 GEN_INT (-2 * UNITS_PER_WORD)));
6134 mem = gen_rtx_MEM (DFmode, fpregs);
6135 set_mem_alias_set (mem, alias_set);
6136 emit_move_insn (mem,
6137 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6139 regno = first_floatreg;
6142 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6143 mem = gen_rtx_MEM (SFmode, fpregs);
6144 set_mem_alias_set (mem, alias_set);
6145 emit_move_insn (mem,
6146 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6147 - (TARGET_LITTLE_ENDIAN != 0)));
6151 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6155 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6156 mem = gen_rtx_MEM (SFmode, fpregs);
6157 set_mem_alias_set (mem, alias_set);
6158 emit_move_insn (mem,
6159 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6162 /* Return the address of the regbuf. */
6163 return XEXP (regbuf, 0);
6166 /* Define the `__builtin_va_list' type for the ABI. */
6169 sh_build_builtin_va_list (void)
6171 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6174 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6175 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6176 return ptr_type_node;
6178 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6180 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6182 f_next_o_limit = build_decl (FIELD_DECL,
6183 get_identifier ("__va_next_o_limit"),
6185 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6187 f_next_fp_limit = build_decl (FIELD_DECL,
6188 get_identifier ("__va_next_fp_limit"),
6190 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6193 DECL_FIELD_CONTEXT (f_next_o) = record;
6194 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6195 DECL_FIELD_CONTEXT (f_next_fp) = record;
6196 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6197 DECL_FIELD_CONTEXT (f_next_stack) = record;
6199 TYPE_FIELDS (record) = f_next_o;
6200 TREE_CHAIN (f_next_o) = f_next_o_limit;
6201 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6202 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6203 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6205 layout_type (record);
6210 /* Implement `va_start' for varargs and stdarg. */
6213 sh_va_start (tree valist, rtx nextarg)
6215 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6216 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6222 expand_builtin_saveregs ();
6223 std_expand_builtin_va_start (valist, nextarg);
6227 if ((! TARGET_SH2E && ! TARGET_SH4)
6228 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6230 std_expand_builtin_va_start (valist, nextarg);
6234 f_next_o = TYPE_FIELDS (va_list_type_node);
6235 f_next_o_limit = TREE_CHAIN (f_next_o);
6236 f_next_fp = TREE_CHAIN (f_next_o_limit);
6237 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6238 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6240 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
6241 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6242 valist, f_next_o_limit);
6243 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
6244 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6245 valist, f_next_fp_limit);
6246 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6247 valist, f_next_stack);
6249 /* Call __builtin_saveregs. */
6250 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6251 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6252 TREE_SIDE_EFFECTS (t) = 1;
6253 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6255 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6260 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6261 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6262 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6263 TREE_SIDE_EFFECTS (t) = 1;
6264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6266 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6267 TREE_SIDE_EFFECTS (t) = 1;
6268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6270 nint = current_function_args_info.arg_count[SH_ARG_INT];
6275 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6276 build_int_2 (UNITS_PER_WORD * nint, 0)));
6277 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6278 TREE_SIDE_EFFECTS (t) = 1;
6279 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6281 u = make_tree (ptr_type_node, nextarg);
6282 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6283 TREE_SIDE_EFFECTS (t) = 1;
6284 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6287 /* Implement `va_arg'. */
6290 sh_va_arg (tree valist, tree type)
6292 HOST_WIDE_INT size, rsize;
6293 tree tmp, pptr_type_node;
6295 rtx result_ptr, result = NULL_RTX;
6296 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
6299 size = int_size_in_bytes (type);
6300 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6301 pptr_type_node = build_pointer_type (ptr_type_node);
6304 type = build_pointer_type (type);
6306 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6307 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6309 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6310 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6314 f_next_o = TYPE_FIELDS (va_list_type_node);
6315 f_next_o_limit = TREE_CHAIN (f_next_o);
6316 f_next_fp = TREE_CHAIN (f_next_o_limit);
6317 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6318 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6320 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
6321 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6322 valist, f_next_o_limit);
6323 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6325 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6326 valist, f_next_fp_limit);
6327 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6328 valist, f_next_stack);
6330 /* Structures with a single member with a distinct mode are passed
6331 like their member. This is relevant if the latter has a REAL_TYPE
6332 or COMPLEX_TYPE type. */
6333 if (TREE_CODE (type) == RECORD_TYPE
6334 && TYPE_FIELDS (type)
6335 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6336 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6337 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6338 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6339 type = TREE_TYPE (TYPE_FIELDS (type));
6342 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6343 || (TREE_CODE (type) == COMPLEX_TYPE
6344 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6349 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6352 addr_rtx = gen_reg_rtx (Pmode);
6353 lab_false = gen_label_rtx ();
6354 lab_over = gen_label_rtx ();
6356 tmp = make_tree (pptr_type_node, addr_rtx);
6357 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
6362 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6363 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6365 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
6367 expand_expr (next_fp_limit, NULL_RTX,
6368 Pmode, EXPAND_NORMAL),
6369 GE, const1_rtx, Pmode, 1, lab_false);
6371 if (TYPE_ALIGN (type) > BITS_PER_WORD
6372 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6373 && (n_floatregs & 1)))
6375 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
6376 build_int_2 (UNITS_PER_WORD, 0));
6377 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6378 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6379 TREE_SIDE_EFFECTS (tmp) = 1;
6380 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6383 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6384 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6386 emit_move_insn (addr_rtx, r);
6388 #ifdef FUNCTION_ARG_SCmode_WART
6389 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6391 rtx addr, real, imag, result_value, slot;
6392 tree subtype = TREE_TYPE (type);
6394 addr = std_expand_builtin_va_arg (valist, subtype);
6395 #ifdef POINTERS_EXTEND_UNSIGNED
6396 if (GET_MODE (addr) != Pmode)
6397 addr = convert_memory_address (Pmode, addr);
6399 imag = gen_rtx_MEM (TYPE_MODE (type), addr);
6400 set_mem_alias_set (imag, get_varargs_alias_set ());
6402 addr = std_expand_builtin_va_arg (valist, subtype);
6403 #ifdef POINTERS_EXTEND_UNSIGNED
6404 if (GET_MODE (addr) != Pmode)
6405 addr = convert_memory_address (Pmode, addr);
6407 real = gen_rtx_MEM (TYPE_MODE (type), addr);
6408 set_mem_alias_set (real, get_varargs_alias_set ());
6410 result_value = gen_rtx_CONCAT (SCmode, real, imag);
6411 /* ??? this interface is stupid - why require a pointer? */
6412 result = gen_reg_rtx (Pmode);
6413 slot = assign_stack_temp (SCmode, 8, 0);
6414 emit_move_insn (slot, result_value);
6415 emit_move_insn (result, XEXP (slot, 0));
6417 #endif /* FUNCTION_ARG_SCmode_WART */
6419 emit_jump_insn (gen_jump (lab_over));
6421 emit_label (lab_false);
6423 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6424 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6426 emit_move_insn (addr_rtx, r);
6430 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
6431 build_int_2 (rsize, 0));
6433 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
6435 expand_expr (next_o_limit, NULL_RTX,
6436 Pmode, EXPAND_NORMAL),
6437 GT, const1_rtx, Pmode, 1, lab_false);
6439 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6440 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6442 emit_move_insn (addr_rtx, r);
6444 emit_jump_insn (gen_jump (lab_over));
6446 emit_label (lab_false);
6448 if (size > 4 && ! TARGET_SH4)
6450 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6451 TREE_SIDE_EFFECTS (tmp) = 1;
6452 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6455 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6456 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6458 emit_move_insn (addr_rtx, r);
6462 emit_label (lab_over);
6465 /* ??? In va-sh.h, there had been code to make values larger than
6466 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6468 result_ptr = std_expand_builtin_va_arg (valist, type);
6471 emit_move_insn (result, result_ptr);
6472 emit_label (lab_over);
6475 result = result_ptr;
6479 #ifdef POINTERS_EXTEND_UNSIGNED
6480 if (GET_MODE (addr) != Pmode)
6481 addr = convert_memory_address (Pmode, result);
6483 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
6484 set_mem_alias_set (result, get_varargs_alias_set ());
6486 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
6487 argument to the varargs alias set. */
6492 sh_promote_prototypes (tree type)
6498 return ! sh_attr_renesas_p (type);
6501 /* Define where to put the arguments to a function.
6502 Value is zero to push the argument on the stack,
6503 or a hard register in which to store the argument.
6505 MODE is the argument's machine mode.
6506 TYPE is the data type of the argument (as a tree).
6507 This is null for libcalls where that information may
6509 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6510 the preceding args and about the function being called.
6511 NAMED is nonzero if this argument is a named parameter
6512 (otherwise it is an extra parameter matching an ellipsis).
6514 On SH the first args are normally in registers
6515 and the rest are pushed. Any arg that starts within the first
6516 NPARM_REGS words is at least partially passed in a register unless
6517 its data type forbids. */
6521 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6522 tree type, int named)
6524 if (! TARGET_SH5 && mode == VOIDmode)
6525 return GEN_INT (ca->renesas_abi ? 1 : 0);
6528 && PASS_IN_REG_P (*ca, mode, type)
6529 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6533 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6534 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6536 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6537 gen_rtx_REG (SFmode,
6539 + (ROUND_REG (*ca, mode) ^ 1)),
6541 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6542 gen_rtx_REG (SFmode,
6544 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6546 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6549 /* If the alignment of a DF value causes an SF register to be
6550 skipped, we will use that skipped register for the next SF
6552 if ((TARGET_HITACHI || ca->renesas_abi)
6553 && ca->free_single_fp_reg
6555 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6557 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6558 ^ (mode == SFmode && TARGET_SH4
6559 && TARGET_LITTLE_ENDIAN != 0
6560 && ! TARGET_HITACHI && ! ca->renesas_abi);
6561 return gen_rtx_REG (mode, regno);
6567 if (mode == VOIDmode && TARGET_SHCOMPACT)
6568 return GEN_INT (ca->call_cookie);
6570 /* The following test assumes unnamed arguments are promoted to
6572 if (mode == SFmode && ca->free_single_fp_reg)
6573 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6575 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6576 && (named || ! ca->prototype_p)
6577 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6579 if (! ca->prototype_p && TARGET_SHMEDIA)
6580 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6582 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6584 + ca->arg_count[(int) SH_ARG_FLOAT]);
6587 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6588 && (! TARGET_SHCOMPACT
6589 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6590 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6593 return gen_rtx_REG (mode, (FIRST_PARM_REG
6594 + ca->arg_count[(int) SH_ARG_INT]));
6603 /* Update the data in CUM to advance over an argument
6604 of mode MODE and data type TYPE.
6605 (TYPE is null for libcalls where that information may not be
6609 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6610 tree type, int named)
6614 else if (TARGET_SH5)
6616 tree type2 = (ca->byref && type
6619 enum machine_mode mode2 = (ca->byref && type
6622 int dwords = ((ca->byref
6625 ? int_size_in_bytes (type2)
6626 : GET_MODE_SIZE (mode2)) + 7) / 8;
6627 int numregs = MIN (dwords, NPARM_REGS (SImode)
6628 - ca->arg_count[(int) SH_ARG_INT]);
6632 ca->arg_count[(int) SH_ARG_INT] += numregs;
6633 if (TARGET_SHCOMPACT
6634 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6637 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6639 /* N.B. We want this also for outgoing. */
6640 ca->stack_regs += numregs;
6645 ca->stack_regs += numregs;
6646 ca->byref_regs += numregs;
6650 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6654 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6657 else if (dwords > numregs)
6659 int pushregs = numregs;
6661 if (TARGET_SHCOMPACT)
6662 ca->stack_regs += numregs;
6663 while (pushregs < NPARM_REGS (SImode) - 1
6664 && (CALL_COOKIE_INT_REG_GET
6666 NPARM_REGS (SImode) - pushregs)
6670 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6674 if (numregs == NPARM_REGS (SImode))
6676 |= CALL_COOKIE_INT_REG (0, 1)
6677 | CALL_COOKIE_STACKSEQ (numregs - 1);
6680 |= CALL_COOKIE_STACKSEQ (numregs);
6683 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6684 && (named || ! ca->prototype_p))
6686 if (mode2 == SFmode && ca->free_single_fp_reg)
6687 ca->free_single_fp_reg = 0;
6688 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6689 < NPARM_REGS (SFmode))
6692 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6694 - ca->arg_count[(int) SH_ARG_FLOAT]);
6696 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6698 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6700 if (ca->outgoing && numregs > 0)
6704 |= (CALL_COOKIE_INT_REG
6705 (ca->arg_count[(int) SH_ARG_INT]
6706 - numregs + ((numfpregs - 2) / 2),
6707 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6710 while (numfpregs -= 2);
6712 else if (mode2 == SFmode && (named)
6713 && (ca->arg_count[(int) SH_ARG_FLOAT]
6714 < NPARM_REGS (SFmode)))
6715 ca->free_single_fp_reg
6716 = FIRST_FP_PARM_REG - numfpregs
6717 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6723 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6725 /* Note that we've used the skipped register. */
6726 if (mode == SFmode && ca->free_single_fp_reg)
6728 ca->free_single_fp_reg = 0;
6731 /* When we have a DF after an SF, there's an SF register that get
6732 skipped in order to align the DF value. We note this skipped
6733 register, because the next SF value will use it, and not the
6734 SF that follows the DF. */
6736 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6738 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6739 + BASE_ARG_REG (mode));
6743 if (! (TARGET_SH4 || ca->renesas_abi)
6744 || PASS_IN_REG_P (*ca, mode, type))
6745 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6746 = (ROUND_REG (*ca, mode)
6748 ? ROUND_ADVANCE (int_size_in_bytes (type))
6749 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6752 /* The Renesas calling convention doesn't quite fit into this scheme since
6753 the address is passed like an invisible argument, but one that is always
6754 passed in memory. */
6756 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6758 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6760 return gen_rtx_REG (Pmode, 2);
6763 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6766 sh_return_in_memory (tree type, tree fndecl)
6770 if (TYPE_MODE (type) == BLKmode)
6771 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6773 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6777 return (TYPE_MODE (type) == BLKmode
6778 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6779 && TREE_CODE (type) == RECORD_TYPE));
6783 /* We actually emit the code in sh_expand_prologue. We used to use
6784 a static variable to flag that we need to emit this code, but that
6785 doesn't when inlining, when functions are deferred and then emitted
6786 later. Fortunately, we already have two flags that are part of struct
6787 function that tell if a function uses varargs or stdarg. */
6789 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
6790 enum machine_mode mode ATTRIBUTE_UNUSED,
6791 tree type ATTRIBUTE_UNUSED,
6792 int *pretend_arg_size ATTRIBUTE_UNUSED,
6793 int second_time ATTRIBUTE_UNUSED)
6795 if (! current_function_stdarg)
6800 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6806 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6808 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6812 /* Define the offset between two registers, one to be eliminated, and
6813 the other its replacement, at the start of a routine. */
6816 initial_elimination_offset (int from, int to)
6819 int regs_saved_rounding = 0;
6820 int total_saved_regs_space;
6821 int total_auto_space;
6822 int save_flags = target_flags;
6824 HARD_REG_SET live_regs_mask;
6826 shmedia_space_reserved_for_target_registers = false;
6827 regs_saved = calc_live_regs (&live_regs_mask);
6828 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6830 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6832 shmedia_space_reserved_for_target_registers = true;
6833 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6836 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6837 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6838 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6840 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6841 copy_flags = target_flags;
6842 target_flags = save_flags;
6844 total_saved_regs_space = regs_saved + regs_saved_rounding;
6846 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6847 return total_saved_regs_space + total_auto_space
6848 + current_function_args_info.byref_regs * 8;
6850 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6851 return total_saved_regs_space + total_auto_space
6852 + current_function_args_info.byref_regs * 8;
6854 /* Initial gap between fp and sp is 0. */
6855 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6858 if (from == RETURN_ADDRESS_POINTER_REGNUM
6859 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6863 int n = total_saved_regs_space;
6864 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6865 save_schedule schedule;
6868 n += total_auto_space;
6870 /* If it wasn't saved, there's not much we can do. */
6871 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6874 target_flags = copy_flags;
6876 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6877 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6878 if (entry->reg == pr_reg)
6880 target_flags = save_flags;
6881 return entry->offset;
6886 return total_auto_space;
6892 /* Handle machine specific pragmas to be semi-compatible with Renesas
6896 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6898 pragma_interrupt = 1;
6902 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6904 pragma_interrupt = pragma_trapa = 1;
6908 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6910 pragma_nosave_low_regs = 1;
6913 /* Generate 'handle_interrupt' attribute for decls */
6916 sh_insert_attributes (tree node, tree *attributes)
6918 if (! pragma_interrupt
6919 || TREE_CODE (node) != FUNCTION_DECL)
6922 /* We are only interested in fields. */
6923 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6926 /* Add a 'handle_interrupt' attribute. */
6927 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6932 /* Supported attributes:
6934 interrupt_handler -- specifies this function is an interrupt handler.
6936 sp_switch -- specifies an alternate stack for an interrupt handler
6939 trap_exit -- use a trapa to exit an interrupt function instead of
6942 renesas -- use Renesas calling/layout conventions (functions and
6947 const struct attribute_spec sh_attribute_table[] =
6949 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6950 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6951 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6952 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6953 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
6954 { NULL, 0, 0, false, false, false, NULL }
6957 /* Handle an "interrupt_handler" attribute; arguments as in
6958 struct attribute_spec.handler. */
6960 sh_handle_interrupt_handler_attribute (tree *node, tree name,
6961 tree args ATTRIBUTE_UNUSED,
6962 int flags ATTRIBUTE_UNUSED,
6965 if (TREE_CODE (*node) != FUNCTION_DECL)
6967 warning ("`%s' attribute only applies to functions",
6968 IDENTIFIER_POINTER (name));
6969 *no_add_attrs = true;
6971 else if (TARGET_SHCOMPACT)
6973 error ("attribute interrupt_handler is not compatible with -m5-compact");
6974 *no_add_attrs = true;
6980 /* Handle an "sp_switch" attribute; arguments as in
6981 struct attribute_spec.handler. */
6983 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
6984 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6986 if (TREE_CODE (*node) != FUNCTION_DECL)
6988 warning ("`%s' attribute only applies to functions",
6989 IDENTIFIER_POINTER (name));
6990 *no_add_attrs = true;
6992 else if (!pragma_interrupt)
6994 /* The sp_switch attribute only has meaning for interrupt functions. */
6995 warning ("`%s' attribute only applies to interrupt functions",
6996 IDENTIFIER_POINTER (name));
6997 *no_add_attrs = true;
6999 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7001 /* The argument must be a constant string. */
7002 warning ("`%s' attribute argument not a string constant",
7003 IDENTIFIER_POINTER (name));
7004 *no_add_attrs = true;
7008 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7009 TREE_STRING_POINTER (TREE_VALUE (args)));
7015 /* Handle an "trap_exit" attribute; arguments as in
7016 struct attribute_spec.handler. */
7018 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7019 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7021 if (TREE_CODE (*node) != FUNCTION_DECL)
7023 warning ("`%s' attribute only applies to functions",
7024 IDENTIFIER_POINTER (name));
7025 *no_add_attrs = true;
7027 else if (!pragma_interrupt)
7029 /* The trap_exit attribute only has meaning for interrupt functions. */
7030 warning ("`%s' attribute only applies to interrupt functions",
7031 IDENTIFIER_POINTER (name));
7032 *no_add_attrs = true;
7034 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7036 /* The argument must be a constant integer. */
7037 warning ("`%s' attribute argument not an integer constant",
7038 IDENTIFIER_POINTER (name));
7039 *no_add_attrs = true;
7043 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7050 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7051 tree name ATTRIBUTE_UNUSED,
7052 tree args ATTRIBUTE_UNUSED,
7053 int flags ATTRIBUTE_UNUSED,
7054 bool *no_add_attrs ATTRIBUTE_UNUSED)
7059 /* True if __attribute__((renesas)) or -mrenesas. */
7061 sh_attr_renesas_p (tree td)
7068 td = TREE_TYPE (td);
7069 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7073 /* True if __attribute__((renesas)) or -mrenesas, for the current
7076 sh_cfun_attr_renesas_p (void)
7078 return sh_attr_renesas_p (current_function_decl);
7082 sh_cfun_interrupt_handler_p (void)
7084 return (lookup_attribute ("interrupt_handler",
7085 DECL_ATTRIBUTES (current_function_decl))
7089 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7092 const char *const name;
7094 const char *const description;
7096 sh_target_switches[] = TARGET_SWITCHES;
7097 #define target_switches sh_target_switches
7099 /* Like default_pch_valid_p, but take flag_mask into account. */
7101 sh_pch_valid_p (const void *data_p, size_t len)
7103 const char *data = (const char *)data_p;
7104 const char *flag_that_differs = NULL;
7108 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7109 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7111 /* -fpic and -fpie also usually make a PCH invalid. */
7112 if (data[0] != flag_pic)
7113 return _("created and used with different settings of -fpic");
7114 if (data[1] != flag_pie)
7115 return _("created and used with different settings of -fpie");
7118 /* Check target_flags. */
7119 memcpy (&old_flags, data, sizeof (target_flags));
7120 if (((old_flags ^ target_flags) & flag_mask) != 0)
7122 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7126 bits = target_switches[i].value;
7130 if ((target_flags & bits) != (old_flags & bits))
7132 flag_that_differs = target_switches[i].name;
7138 data += sizeof (target_flags);
7139 len -= sizeof (target_flags);
7141 /* Check string options. */
7142 #ifdef TARGET_OPTIONS
7143 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7145 const char *str = *target_options[i].variable;
7149 l = strlen (str) + 1;
7150 if (len < l || memcmp (data, str, l) != 0)
7152 flag_that_differs = target_options[i].prefix;
7165 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7168 return _("out of memory");
7173 /* Predicates used by the templates. */
7175 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7176 Used only in general_movsrc_operand. */
7179 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7191 /* Returns 1 if OP can be source of a simple move operation.
7192 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7193 invalid as are subregs of system registers. */
7196 general_movsrc_operand (rtx op, enum machine_mode mode)
7198 if (GET_CODE (op) == MEM)
7200 rtx inside = XEXP (op, 0);
7201 if (GET_CODE (inside) == CONST)
7202 inside = XEXP (inside, 0);
7204 if (GET_CODE (inside) == LABEL_REF)
7207 if (GET_CODE (inside) == PLUS
7208 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7209 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7212 /* Only post inc allowed. */
7213 if (GET_CODE (inside) == PRE_DEC)
7217 if ((mode == QImode || mode == HImode)
7218 && (GET_CODE (op) == SUBREG
7219 && GET_CODE (XEXP (op, 0)) == REG
7220 && system_reg_operand (XEXP (op, 0), mode)))
7223 return general_operand (op, mode);
7226 /* Returns 1 if OP can be a destination of a move.
7227 Same as general_operand, but no preinc allowed. */
7230 general_movdst_operand (rtx op, enum machine_mode mode)
7232 /* Only pre dec allowed. */
7233 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7236 return general_operand (op, mode);
7239 /* Returns 1 if OP is a normal arithmetic register. */
7242 arith_reg_operand (rtx op, enum machine_mode mode)
7244 if (register_operand (op, mode))
7248 if (GET_CODE (op) == REG)
7250 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7251 regno = REGNO (SUBREG_REG (op));
7255 return (regno != T_REG && regno != PR_REG
7256 && ! TARGET_REGISTER_P (regno)
7257 && (regno != FPUL_REG || TARGET_SH4)
7258 && regno != MACH_REG && regno != MACL_REG);
7263 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7264 because this would lead to missing sign extensions when truncating from
7265 DImode to SImode. */
7267 arith_reg_dest (rtx op, enum machine_mode mode)
7269 if (mode == DImode && GET_CODE (op) == SUBREG
7270 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7272 return arith_reg_operand (op, mode);
7276 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7278 enum machine_mode op_mode = GET_MODE (op);
7280 if (GET_MODE_CLASS (op_mode) != MODE_INT
7281 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7283 if (! reload_completed)
7285 return true_regnum (op) <= LAST_GENERAL_REG;
7289 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7291 if (register_operand (op, mode))
7295 if (GET_CODE (op) == REG)
7297 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7298 regno = REGNO (SUBREG_REG (op));
7302 return (regno >= FIRST_PSEUDO_REGISTER
7303 || FP_REGISTER_P (regno));
7308 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7311 arith_operand (rtx op, enum machine_mode mode)
7313 if (arith_reg_operand (op, mode))
7318 /* FIXME: We should be checking whether the CONST_INT fits in a
7319 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7320 attempting to transform a sequence of two 64-bit sets of the
7321 same register from literal constants into a set and an add,
7322 when the difference is too wide for an add. */
7323 if (GET_CODE (op) == CONST_INT
7324 || EXTRA_CONSTRAINT_C16 (op))
7329 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7335 /* Returns 1 if OP is a valid source operand for a compare insn. */
7338 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7340 if (arith_reg_operand (op, mode))
7343 if (EXTRA_CONSTRAINT_Z (op))
7349 /* Return 1 if OP is a valid source operand for an SHmedia operation
7350 that takes either a register or a 6-bit immediate. */
7353 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7355 return (arith_reg_operand (op, mode)
7356 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7359 /* Returns 1 if OP is a valid source operand for a logical operation. */
7362 logical_operand (rtx op, enum machine_mode mode)
7364 if (arith_reg_operand (op, mode))
7369 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7374 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7381 and_operand (rtx op, enum machine_mode mode)
7383 if (logical_operand (op, mode))
7386 /* Check mshflo.l / mshflhi.l opportunities. */
7389 && GET_CODE (op) == CONST_INT
7390 && CONST_OK_FOR_J16 (INTVAL (op)))
7396 /* Nonzero if OP is a floating point value with value 0.0. */
7399 fp_zero_operand (rtx op)
7403 if (GET_MODE (op) != SFmode)
7406 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7407 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7410 /* Nonzero if OP is a floating point value with value 1.0. */
7413 fp_one_operand (rtx op)
7417 if (GET_MODE (op) != SFmode)
7420 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7421 return REAL_VALUES_EQUAL (r, dconst1);
7424 /* For -m4 and -m4-single-only, mode switching is used. If we are
7425 compiling without -mfmovd, movsf_ie isn't taken into account for
7426 mode switching. We could check in machine_dependent_reorg for
7427 cases where we know we are in single precision mode, but there is
7428 interface to find that out during reload, so we must avoid
7429 choosing an fldi alternative during reload and thus failing to
7430 allocate a scratch register for the constant loading. */
7434 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7438 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7440 enum rtx_code code = GET_CODE (op);
7441 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7445 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7447 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
7448 && GET_MODE (op) == PSImode);
7452 fpul_operand (rtx op, enum machine_mode mode)
7455 return fp_arith_reg_operand (op, mode);
7457 return (GET_CODE (op) == REG
7458 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7459 && GET_MODE (op) == mode);
7463 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7465 return (GET_CODE (op) == SYMBOL_REF);
7468 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7470 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7472 if (GET_CODE (op) != SYMBOL_REF)
7474 return SYMBOL_REF_TLS_MODEL (op);
7478 commutative_float_operator (rtx op, enum machine_mode mode)
7480 if (GET_MODE (op) != mode)
7482 switch (GET_CODE (op))
7494 noncommutative_float_operator (rtx op, enum machine_mode mode)
7496 if (GET_MODE (op) != mode)
7498 switch (GET_CODE (op))
7510 unary_float_operator (rtx op, enum machine_mode mode)
7512 if (GET_MODE (op) != mode)
7514 switch (GET_CODE (op))
7527 binary_float_operator (rtx op, enum machine_mode mode)
7529 if (GET_MODE (op) != mode)
7531 switch (GET_CODE (op))
7545 binary_logical_operator (rtx op, enum machine_mode mode)
7547 if (GET_MODE (op) != mode)
7549 switch (GET_CODE (op))
7562 equality_comparison_operator (rtx op, enum machine_mode mode)
7564 return ((mode == VOIDmode || GET_MODE (op) == mode)
7565 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7569 greater_comparison_operator (rtx op, enum machine_mode mode)
7571 if (mode != VOIDmode && GET_MODE (op) == mode)
7573 switch (GET_CODE (op))
7586 less_comparison_operator (rtx op, enum machine_mode mode)
7588 if (mode != VOIDmode && GET_MODE (op) == mode)
7590 switch (GET_CODE (op))
7602 /* Accept pseudos and branch target registers. */
7604 target_reg_operand (rtx op, enum machine_mode mode)
7607 || GET_MODE (op) != DImode)
7610 if (GET_CODE (op) == SUBREG)
7613 if (GET_CODE (op) != REG)
7616 /* We must protect ourselves from matching pseudos that are virtual
7617 register, because they will eventually be replaced with hardware
7618 registers that aren't branch-target registers. */
7619 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7620 || TARGET_REGISTER_P (REGNO (op)))
7626 /* Same as target_reg_operand, except that label_refs and symbol_refs
7627 are accepted before reload. */
7629 target_operand (rtx op, enum machine_mode mode)
7634 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7635 && EXTRA_CONSTRAINT_Csy (op))
7636 return ! reload_completed;
7638 return target_reg_operand (op, mode);
7642 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7646 if (GET_CODE (op) != CONST_INT)
7649 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7653 extend_reg_operand (rtx op, enum machine_mode mode)
7655 return (GET_CODE (op) == TRUNCATE
7657 : arith_reg_operand) (op, mode);
7661 trunc_hi_operand (rtx op, enum machine_mode mode)
7663 enum machine_mode op_mode = GET_MODE (op);
7665 if (op_mode != SImode && op_mode != DImode
7666 && op_mode != V4HImode && op_mode != V2SImode)
7668 return extend_reg_operand (op, mode);
7672 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7674 return (GET_CODE (op) == TRUNCATE
7676 : arith_reg_or_0_operand) (op, mode);
7680 general_extend_operand (rtx op, enum machine_mode mode)
7682 return (GET_CODE (op) == TRUNCATE
7684 : nonimmediate_operand) (op, mode);
7688 inqhi_operand (rtx op, enum machine_mode mode)
7690 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7693 /* Can't use true_regnum here because copy_cost wants to know about
7694 SECONDARY_INPUT_RELOAD_CLASS. */
7695 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7699 sh_rep_vec (rtx v, enum machine_mode mode)
7704 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7705 || (GET_MODE (v) != mode && mode != VOIDmode))
7707 i = XVECLEN (v, 0) - 2;
7708 x = XVECEXP (v, 0, i + 1);
7709 if (GET_MODE_UNIT_SIZE (mode) == 1)
7711 y = XVECEXP (v, 0, i);
7712 for (i -= 2; i >= 0; i -= 2)
7713 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7714 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7719 if (XVECEXP (v, 0, i) != x)
7724 /* Determine if V is a constant vector matching MODE with only one element
7725 that is not a sign extension. Two byte-sized elements count as one. */
7727 sh_1el_vec (rtx v, enum machine_mode mode)
7730 int i, last, least, sign_ix;
7733 if (GET_CODE (v) != CONST_VECTOR
7734 || (GET_MODE (v) != mode && mode != VOIDmode))
7736 /* Determine numbers of last and of least significant elements. */
7737 last = XVECLEN (v, 0) - 1;
7738 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7739 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7742 if (GET_MODE_UNIT_SIZE (mode) == 1)
7743 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7744 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7746 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7747 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7748 ? constm1_rtx : const0_rtx);
7749 i = XVECLEN (v, 0) - 1;
7751 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7758 sh_const_vec (rtx v, enum machine_mode mode)
7762 if (GET_CODE (v) != CONST_VECTOR
7763 || (GET_MODE (v) != mode && mode != VOIDmode))
7765 i = XVECLEN (v, 0) - 1;
7767 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7772 /* Return the destination address of a branch. */
7775 branch_dest (rtx branch)
7777 rtx dest = SET_SRC (PATTERN (branch));
7780 if (GET_CODE (dest) == IF_THEN_ELSE)
7781 dest = XEXP (dest, 1);
7782 dest = XEXP (dest, 0);
7783 dest_uid = INSN_UID (dest);
7784 return INSN_ADDRESSES (dest_uid);
7787 /* Return nonzero if REG is not used after INSN.
7788 We assume REG is a reload reg, and therefore does
7789 not live past labels. It may live past calls or jumps though. */
7791 reg_unused_after (rtx reg, rtx insn)
7796 /* If the reg is set by this instruction, then it is safe for our
7797 case. Disregard the case where this is a store to memory, since
7798 we are checking a register used in the store address. */
7799 set = single_set (insn);
7800 if (set && GET_CODE (SET_DEST (set)) != MEM
7801 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7804 while ((insn = NEXT_INSN (insn)))
7810 code = GET_CODE (insn);
7813 /* If this is a label that existed before reload, then the register
7814 if dead here. However, if this is a label added by reorg, then
7815 the register may still be live here. We can't tell the difference,
7816 so we just ignore labels completely. */
7817 if (code == CODE_LABEL)
7822 if (code == JUMP_INSN)
7825 /* If this is a sequence, we must handle them all at once.
7826 We could have for instance a call that sets the target register,
7827 and an insn in a delay slot that uses the register. In this case,
7828 we must return 0. */
7829 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7834 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7836 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7837 rtx set = single_set (this_insn);
7839 if (GET_CODE (this_insn) == CALL_INSN)
7841 else if (GET_CODE (this_insn) == JUMP_INSN)
7843 if (INSN_ANNULLED_BRANCH_P (this_insn))
7848 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7850 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7852 if (GET_CODE (SET_DEST (set)) != MEM)
7858 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7863 else if (code == JUMP_INSN)
7867 set = single_set (insn);
7868 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7870 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7871 return GET_CODE (SET_DEST (set)) != MEM;
7872 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7875 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7883 static GTY(()) rtx fpscr_rtx;
7885 get_fpscr_rtx (void)
7889 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7890 REG_USERVAR_P (fpscr_rtx) = 1;
7891 mark_user_reg (fpscr_rtx);
7893 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7894 mark_user_reg (fpscr_rtx);
7899 emit_sf_insn (rtx pat)
7905 emit_df_insn (rtx pat)
7911 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7913 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7917 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7919 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7924 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7926 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7930 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7932 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7936 /* ??? gcc does flow analysis strictly after common subexpression
7937 elimination. As a result, common subexpression elimination fails
7938 when there are some intervening statements setting the same register.
7939 If we did nothing about this, this would hurt the precision switching
7940 for SH4 badly. There is some cse after reload, but it is unable to
7941 undo the extra register pressure from the unused instructions, and
7942 it cannot remove auto-increment loads.
7944 A C code example that shows this flow/cse weakness for (at least) SH
7945 and sparc (as of gcc ss-970706) is this:
7959 So we add another pass before common subexpression elimination, to
7960 remove assignments that are dead due to a following assignment in the
7961 same basic block. */
7964 mark_use (rtx x, rtx *reg_set_block)
7970 code = GET_CODE (x);
7975 int regno = REGNO (x);
7976 int nregs = (regno < FIRST_PSEUDO_REGISTER
7977 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7981 reg_set_block[regno + nregs - 1] = 0;
7988 rtx dest = SET_DEST (x);
7990 if (GET_CODE (dest) == SUBREG)
7991 dest = SUBREG_REG (dest);
7992 if (GET_CODE (dest) != REG)
7993 mark_use (dest, reg_set_block);
7994 mark_use (SET_SRC (x), reg_set_block);
8001 const char *fmt = GET_RTX_FORMAT (code);
8003 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8006 mark_use (XEXP (x, i), reg_set_block);
8007 else if (fmt[i] == 'E')
8008 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8009 mark_use (XVECEXP (x, i, j), reg_set_block);
8016 static rtx get_free_reg (HARD_REG_SET);
8018 /* This function returns a register to use to load the address to load
8019 the fpscr from. Currently it always returns r1 or r7, but when we are
8020 able to use pseudo registers after combine, or have a better mechanism
8021 for choosing a register, it should be done here. */
8022 /* REGS_LIVE is the liveness information for the point for which we
8023 need this allocation. In some bare-bones exit blocks, r1 is live at the
8024 start. We can even have all of r0..r3 being live:
8025 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8026 INSN before which new insns are placed with will clobber the register
8027 we return. If a basic block consists only of setting the return value
8028 register to a pseudo and using that register, the return value is not
8029 live before or after this block, yet we we'll insert our insns right in
8033 get_free_reg (HARD_REG_SET regs_live)
8035 if (! TEST_HARD_REG_BIT (regs_live, 1))
8036 return gen_rtx_REG (Pmode, 1);
8038 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8039 there shouldn't be anything but a jump before the function end. */
8040 if (! TEST_HARD_REG_BIT (regs_live, 7))
8041 return gen_rtx_REG (Pmode, 7);
8046 /* This function will set the fpscr from memory.
8047 MODE is the mode we are setting it to. */
8049 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8051 enum attr_fp_mode fp_mode = mode;
8052 rtx addr_reg = get_free_reg (regs_live);
8054 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8055 emit_insn (gen_fpu_switch1 (addr_reg));
8057 emit_insn (gen_fpu_switch0 (addr_reg));
8060 /* Is the given character a logical line separator for the assembler? */
8061 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8062 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8066 sh_insn_length_adjustment (rtx insn)
8068 /* Instructions with unfilled delay slots take up an extra two bytes for
8069 the nop in the delay slot. */
8070 if (((GET_CODE (insn) == INSN
8071 && GET_CODE (PATTERN (insn)) != USE
8072 && GET_CODE (PATTERN (insn)) != CLOBBER)
8073 || GET_CODE (insn) == CALL_INSN
8074 || (GET_CODE (insn) == JUMP_INSN
8075 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8076 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8077 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8078 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8081 /* SH2e has a bug that prevents the use of annulled branches, so if
8082 the delay slot is not filled, we'll have to put a NOP in it. */
8083 if (sh_cpu == CPU_SH2E
8084 && GET_CODE (insn) == JUMP_INSN
8085 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8086 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8087 && get_attr_type (insn) == TYPE_CBRANCH
8088 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8091 /* sh-dsp parallel processing insn take four bytes instead of two. */
8093 if (GET_CODE (insn) == INSN)
8096 rtx body = PATTERN (insn);
8097 const char *template;
8099 int maybe_label = 1;
8101 if (GET_CODE (body) == ASM_INPUT)
8102 template = XSTR (body, 0);
8103 else if (asm_noperands (body) >= 0)
8105 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8114 while (c == ' ' || c == '\t');
8115 /* all sh-dsp parallel-processing insns start with p.
8116 The only non-ppi sh insn starting with p is pref.
8117 The only ppi starting with pr is prnd. */
8118 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8120 /* The repeat pseudo-insn expands two three insns, a total of
8121 six bytes in size. */
8122 else if ((c == 'r' || c == 'R')
8123 && ! strncasecmp ("epeat", template, 5))
8125 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8127 /* If this is a label, it is obviously not a ppi insn. */
8128 if (c == ':' && maybe_label)
8133 else if (c == '\'' || c == '"')
8138 maybe_label = c != ':';
8146 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8147 isn't protected by a PIC unspec. */
8149 nonpic_symbol_mentioned_p (rtx x)
8151 register const char *fmt;
8154 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8155 || GET_CODE (x) == PC)
8158 /* We don't want to look into the possible MEM location of a
8159 CONST_DOUBLE, since we're not going to use it, in general. */
8160 if (GET_CODE (x) == CONST_DOUBLE)
8163 if (GET_CODE (x) == UNSPEC
8164 && (XINT (x, 1) == UNSPEC_PIC
8165 || XINT (x, 1) == UNSPEC_GOT
8166 || XINT (x, 1) == UNSPEC_GOTOFF
8167 || XINT (x, 1) == UNSPEC_GOTPLT
8168 || XINT (x, 1) == UNSPEC_GOTTPOFF
8169 || XINT (x, 1) == UNSPEC_DTPOFF
8170 || XINT (x, 1) == UNSPEC_PLT))
8173 fmt = GET_RTX_FORMAT (GET_CODE (x));
8174 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8180 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8181 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8184 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8191 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8192 @GOTOFF in `reg'. */
8194 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8197 if (tls_symbolic_operand (orig, Pmode))
8200 if (GET_CODE (orig) == LABEL_REF
8201 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8204 reg = gen_reg_rtx (Pmode);
8206 emit_insn (gen_symGOTOFF2reg (reg, orig));
8209 else if (GET_CODE (orig) == SYMBOL_REF)
8212 reg = gen_reg_rtx (Pmode);
8214 emit_insn (gen_symGOT2reg (reg, orig));
8220 /* Mark the use of a constant in the literal table. If the constant
8221 has multiple labels, make it unique. */
8223 mark_constant_pool_use (rtx x)
8225 rtx insn, lab, pattern;
8230 switch (GET_CODE (x))
8240 /* Get the first label in the list of labels for the same constant
8241 and delete another labels in the list. */
8243 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8245 if (GET_CODE (insn) != CODE_LABEL
8246 || LABEL_REFS (insn) != NEXT_INSN (insn))
8251 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8252 INSN_DELETED_P (insn) = 1;
8254 /* Mark constants in a window. */
8255 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8257 if (GET_CODE (insn) != INSN)
8260 pattern = PATTERN (insn);
8261 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8264 switch (XINT (pattern, 1))
8266 case UNSPECV_CONST2:
8267 case UNSPECV_CONST4:
8268 case UNSPECV_CONST8:
8269 XVECEXP (pattern, 0, 1) = const1_rtx;
8271 case UNSPECV_WINDOW_END:
8272 if (XVECEXP (pattern, 0, 0) == x)
8275 case UNSPECV_CONST_END:
8285 /* Return true if it's possible to redirect BRANCH1 to the destination
8286 of an unconditional jump BRANCH2. We only want to do this if the
8287 resulting branch will have a short displacement. */
8289 sh_can_redirect_branch (rtx branch1, rtx branch2)
8291 if (flag_expensive_optimizations && simplejump_p (branch2))
8293 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8297 for (distance = 0, insn = NEXT_INSN (branch1);
8298 insn && distance < 256;
8299 insn = PREV_INSN (insn))
8304 distance += get_attr_length (insn);
8306 for (distance = 0, insn = NEXT_INSN (branch1);
8307 insn && distance < 256;
8308 insn = NEXT_INSN (insn))
8313 distance += get_attr_length (insn);
8319 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8321 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8322 unsigned int new_reg)
8324 /* Interrupt functions can only use registers that have already been
8325 saved by the prologue, even if they would normally be
8328 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8334 /* Function to update the integer COST
8335 based on the relationship between INSN that is dependent on
8336 DEP_INSN through the dependence LINK. The default is to make no
8337 adjustment to COST. This can be used for example to specify to
8338 the scheduler that an output- or anti-dependence does not incur
8339 the same cost as a data-dependence. The return value should be
8340 the new value for COST. */
8342 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8348 /* On SHmedia, if the dependence is an anti-dependence or
8349 output-dependence, there is no cost. */
8350 if (REG_NOTE_KIND (link) != 0)
8353 if (get_attr_is_mac_media (insn)
8354 && get_attr_is_mac_media (dep_insn))
8357 else if (REG_NOTE_KIND (link) == 0)
8359 enum attr_type dep_type, type;
8361 if (recog_memoized (insn) < 0
8362 || recog_memoized (dep_insn) < 0)
8365 dep_type = get_attr_type (dep_insn);
8366 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8368 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8369 && (type = get_attr_type (insn)) != TYPE_CALL
8370 && type != TYPE_SFUNC)
8373 /* The only input for a call that is timing-critical is the
8374 function's address. */
8375 if (GET_CODE(insn) == CALL_INSN)
8377 rtx call = PATTERN (insn);
8379 if (GET_CODE (call) == PARALLEL)
8380 call = XVECEXP (call, 0 ,0);
8381 if (GET_CODE (call) == SET)
8382 call = SET_SRC (call);
8383 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8384 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8387 /* Likewise, the most timing critical input for an sfuncs call
8388 is the function address. However, sfuncs typically start
8389 using their arguments pretty quickly.
8390 Assume a four cycle delay before they are needed. */
8391 /* All sfunc calls are parallels with at least four components.
8392 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8393 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8394 && XVECLEN (PATTERN (insn), 0) >= 4
8395 && (reg = sfunc_uses_reg (insn)))
8397 if (! reg_set_p (reg, dep_insn))
8400 /* When the preceding instruction loads the shift amount of
8401 the following SHAD/SHLD, the latency of the load is increased
8404 && get_attr_type (insn) == TYPE_DYN_SHIFT
8405 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8406 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8407 XEXP (SET_SRC (single_set (insn)),
8410 /* When an LS group instruction with a latency of less than
8411 3 cycles is followed by a double-precision floating-point
8412 instruction, FIPR, or FTRV, the latency of the first
8413 instruction is increased to 3 cycles. */
8415 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8416 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8418 /* The lsw register of a double-precision computation is ready one
8420 else if (reload_completed
8421 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8422 && (use_pat = single_set (insn))
8423 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8427 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8428 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8431 /* An anti-dependence penalty of two applies if the first insn is a double
8432 precision fadd / fsub / fmul. */
8433 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8434 && recog_memoized (dep_insn) >= 0
8435 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8436 /* A lot of alleged anti-flow dependences are fake,
8437 so check this one is real. */
8438 && flow_dependent_p (dep_insn, insn))
8445 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8446 if DEP_INSN is anti-flow dependent on INSN. */
8448 flow_dependent_p (rtx insn, rtx dep_insn)
8450 rtx tmp = PATTERN (insn);
8452 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8453 return tmp == NULL_RTX;
8456 /* A helper function for flow_dependent_p called through note_stores. */
8458 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8460 rtx * pinsn = (rtx *) data;
8462 if (*pinsn && reg_referenced_p (x, *pinsn))
8466 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8467 'special function' patterns (type sfunc) that clobber pr, but that
8468 do not look like function calls to leaf_function_p. Hence we must
8469 do this extra check. */
8473 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8476 /* This Function returns nonzero if the DFA based scheduler interface
8477 is to be used. At present this is supported for the SH4 only. */
8479 sh_use_dfa_interface (void)
8481 if (TARGET_HARD_SH4)
8487 /* This function returns "2" to indicate dual issue for the SH4
8488 processor. To be used by the DFA pipeline description. */
8490 sh_issue_rate (void)
8492 if (TARGET_SUPERSCALAR)
8498 /* Functions for ready queue reordering for sched1. */
8500 /* Get weight for mode for a set x. */
8502 find_set_regmode_weight (rtx x, enum machine_mode mode)
8504 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8506 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8508 if (GET_CODE (SET_DEST (x)) == REG)
8510 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8520 /* Get regmode weight for insn. */
8522 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8524 short reg_weight = 0;
8527 /* Increment weight for each register born here. */
8529 reg_weight += find_set_regmode_weight (x, mode);
8530 if (GET_CODE (x) == PARALLEL)
8533 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8535 x = XVECEXP (PATTERN (insn), 0, j);
8536 reg_weight += find_set_regmode_weight (x, mode);
8539 /* Decrement weight for each register that dies here. */
8540 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8542 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8544 rtx note = XEXP (x, 0);
8545 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8552 /* Calculate regmode weights for all insns of a basic block. */
8554 find_regmode_weight (int b, enum machine_mode mode)
8556 rtx insn, next_tail, head, tail;
8558 get_block_head_tail (b, &head, &tail);
8559 next_tail = NEXT_INSN (tail);
8561 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8563 /* Handle register life information. */
8568 INSN_REGMODE_WEIGHT (insn, mode) =
8569 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8570 else if (mode == SImode)
8571 INSN_REGMODE_WEIGHT (insn, mode) =
8572 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8576 /* Comparison function for ready queue sorting. */
8578 rank_for_reorder (const void *x, const void *y)
8580 rtx tmp = *(const rtx *) y;
8581 rtx tmp2 = *(const rtx *) x;
8583 /* The insn in a schedule group should be issued the first. */
8584 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8585 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8587 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8588 minimizes instruction movement, thus minimizing sched's effect on
8589 register pressure. */
8590 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8593 /* Resort the array A in which only element at index N may be out of order. */
8595 swap_reorder (rtx *a, int n)
8597 rtx insn = a[n - 1];
8600 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8608 #define SCHED_REORDER(READY, N_READY) \
8611 if ((N_READY) == 2) \
8612 swap_reorder (READY, N_READY); \
8613 else if ((N_READY) > 2) \
8614 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8618 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8621 ready_reorder (rtx *ready, int nready)
8623 SCHED_REORDER (ready, nready);
8626 /* Calculate regmode weights for all insns of all basic block. */
8628 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8629 int verbose ATTRIBUTE_UNUSED,
8634 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8635 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8637 FOR_EACH_BB_REVERSE (b)
8639 find_regmode_weight (b->index, SImode);
8640 find_regmode_weight (b->index, SFmode);
8643 CURR_REGMODE_PRESSURE (SImode) = 0;
8644 CURR_REGMODE_PRESSURE (SFmode) = 0;
8650 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8651 int verbose ATTRIBUTE_UNUSED)
8653 if (regmode_weight[0])
8655 free (regmode_weight[0]);
8656 regmode_weight[0] = NULL;
8658 if (regmode_weight[1])
8660 free (regmode_weight[1]);
8661 regmode_weight[1] = NULL;
8665 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8666 keep count of register pressures on SImode and SFmode. */
8668 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8669 int sched_verbose ATTRIBUTE_UNUSED,
8673 if (GET_CODE (PATTERN (insn)) != USE
8674 && GET_CODE (PATTERN (insn)) != CLOBBER)
8675 cached_can_issue_more = can_issue_more - 1;
8677 cached_can_issue_more = can_issue_more;
8679 if (reload_completed)
8680 return cached_can_issue_more;
8682 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8683 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8685 return cached_can_issue_more;
8689 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8690 int verbose ATTRIBUTE_UNUSED,
8691 int veclen ATTRIBUTE_UNUSED)
8693 CURR_REGMODE_PRESSURE (SImode) = 0;
8694 CURR_REGMODE_PRESSURE (SFmode) = 0;
8697 /* Some magic numbers. */
8698 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8699 functions that already have high pressure on r0. */
8700 #define R0_MAX_LIFE_REGIONS 2
8701 #define R0_MAX_LIVE_LENGTH 12
8702 /* Register Pressure thresholds for SImode and SFmode registers. */
8703 #define SIMODE_MAX_WEIGHT 5
8704 #define SFMODE_MAX_WEIGHT 10
8706 /* Return true if the pressure is high for MODE. */
8708 high_pressure (enum machine_mode mode)
8710 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8711 functions that already have high pressure on r0. */
8712 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8713 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8717 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8719 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8722 /* Reorder ready queue if register pressure is high. */
8724 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8725 int sched_verbose ATTRIBUTE_UNUSED,
8728 int clock_var ATTRIBUTE_UNUSED)
8730 if (reload_completed)
8731 return sh_issue_rate ();
8733 if (high_pressure (SFmode) || high_pressure (SImode))
8735 ready_reorder (ready, *n_readyp);
8738 return sh_issue_rate ();
8741 /* Skip cycles if the current register pressure is high. */
8743 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8744 int sched_verbose ATTRIBUTE_UNUSED,
8745 rtx *ready ATTRIBUTE_UNUSED,
8746 int *n_readyp ATTRIBUTE_UNUSED,
8747 int clock_var ATTRIBUTE_UNUSED)
8749 if (reload_completed)
8750 return cached_can_issue_more;
8752 if (high_pressure(SFmode) || high_pressure (SImode))
8755 return cached_can_issue_more;
8758 /* Skip cycles without sorting the ready queue. This will move insn from
8759 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8760 queue by sh_reorder. */
8762 /* Generally, skipping these many cycles are sufficient for all insns to move
8767 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8768 int sched_verbose ATTRIBUTE_UNUSED,
8769 rtx insn ATTRIBUTE_UNUSED,
8774 if (reload_completed)
8779 if ((clock_var - last_clock_var) < MAX_SKIPS)
8784 /* If this is the last cycle we are skipping, allow reordering of R. */
8785 if ((clock_var - last_clock_var) == MAX_SKIPS)
8797 /* SHmedia requires registers for branches, so we can't generate new
8798 branches past reload. */
8800 sh_cannot_modify_jumps_p (void)
8802 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8806 sh_target_reg_class (void)
8808 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8812 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8814 return (shmedia_space_reserved_for_target_registers
8815 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8819 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8821 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8825 On the SH1..SH4, the trampoline looks like
8826 2 0002 D202 mov.l l2,r2
8827 1 0000 D301 mov.l l1,r3
8830 5 0008 00000000 l1: .long area
8831 6 000c 00000000 l2: .long function
8833 SH5 (compact) uses r1 instead of r3 for the static chain. */
8836 /* Emit RTL insns to initialize the variable parts of a trampoline.
8837 FNADDR is an RTX for the address of the function's pure code.
8838 CXT is an RTX for the static chain value for the function. */
8841 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8843 if (TARGET_SHMEDIA64)
8848 rtx movi1 = GEN_INT (0xcc000010);
8849 rtx shori1 = GEN_INT (0xc8000010);
8852 /* The following trampoline works within a +- 128 KB range for cxt:
8853 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8854 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8855 gettr tr1,r1; blink tr0,r63 */
8856 /* Address rounding makes it hard to compute the exact bounds of the
8857 offset for this trampoline, but we have a rather generous offset
8858 range, so frame_offset should do fine as an upper bound. */
8859 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8861 /* ??? could optimize this trampoline initialization
8862 by writing DImode words with two insns each. */
8863 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8864 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8865 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8866 insn = gen_rtx_AND (DImode, insn, mask);
8867 /* Or in ptb/u .,tr1 pattern */
8868 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8869 insn = force_operand (insn, NULL_RTX);
8870 insn = gen_lowpart (SImode, insn);
8871 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8872 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8873 insn = gen_rtx_AND (DImode, insn, mask);
8874 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8875 insn = gen_lowpart (SImode, insn);
8876 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8877 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8878 insn = gen_rtx_AND (DImode, insn, mask);
8879 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8880 insn = gen_lowpart (SImode, insn);
8881 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8882 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8883 insn = gen_rtx_AND (DImode, insn, mask);
8884 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8885 insn = gen_lowpart (SImode, insn);
8886 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8888 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8889 insn = gen_rtx_AND (DImode, insn, mask);
8890 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8891 insn = gen_lowpart (SImode, insn);
8892 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8894 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8895 GEN_INT (0x6bf10600));
8896 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8897 GEN_INT (0x4415fc10));
8898 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8899 GEN_INT (0x4401fff0));
8900 emit_insn (gen_ic_invalidate_line (tramp));
8903 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8904 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8906 tramp_templ = gen_datalabel_ref (tramp_templ);
8907 dst = gen_rtx_MEM (BLKmode, tramp);
8908 src = gen_rtx_MEM (BLKmode, tramp_templ);
8909 set_mem_align (dst, 256);
8910 set_mem_align (src, 64);
8911 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8913 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8915 emit_move_insn (gen_rtx_MEM (Pmode,
8916 plus_constant (tramp,
8918 + GET_MODE_SIZE (Pmode))),
8920 emit_insn (gen_ic_invalidate_line (tramp));
8923 else if (TARGET_SHMEDIA)
8925 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8926 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8927 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8928 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8929 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8930 rotated 10 right, and higher 16 bit of every 32 selected. */
8932 = force_reg (V2HImode, (simplify_gen_subreg
8933 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8934 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8935 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8937 tramp = force_reg (Pmode, tramp);
8938 fnaddr = force_reg (SImode, fnaddr);
8939 cxt = force_reg (SImode, cxt);
8940 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8941 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8943 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8944 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8945 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
8946 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8947 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8948 gen_rtx_SUBREG (V2HImode, cxt, 0),
8950 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8951 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8952 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
8953 if (TARGET_LITTLE_ENDIAN)
8955 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8956 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8960 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8961 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8963 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8964 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8965 emit_insn (gen_ic_invalidate_line (tramp));
8968 else if (TARGET_SHCOMPACT)
8970 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8973 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8974 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8976 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8977 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8979 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8981 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8985 if (TARGET_USERMODE)
8986 emit_library_call (function_symbol ("__ic_invalidate"),
8987 0, VOIDmode, 1, tramp, SImode);
8989 emit_insn (gen_ic_invalidate_line (tramp));
8993 /* FIXME: This is overly conservative. A SHcompact function that
8994 receives arguments ``by reference'' will have them stored in its
8995 own stack frame, so it must not pass pointers or references to
8996 these arguments to other functions by means of sibling calls. */
8998 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9001 && (! TARGET_SHCOMPACT
9002 || current_function_args_info.stack_regs == 0)
9003 && ! sh_cfun_interrupt_handler_p ());
9006 /* Machine specific built-in functions. */
9008 struct builtin_description
9010 const enum insn_code icode;
9011 const char *const name;
9015 /* describe number and signedness of arguments; arg[0] == result
9016 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9017 static const char signature_args[][4] =
9019 #define SH_BLTIN_V2SI2 0
9021 #define SH_BLTIN_V4HI2 1
9023 #define SH_BLTIN_V2SI3 2
9025 #define SH_BLTIN_V4HI3 3
9027 #define SH_BLTIN_V8QI3 4
9029 #define SH_BLTIN_MAC_HISI 5
9031 #define SH_BLTIN_SH_HI 6
9033 #define SH_BLTIN_SH_SI 7
9035 #define SH_BLTIN_V4HI2V2SI 8
9037 #define SH_BLTIN_V4HI2V8QI 9
9039 #define SH_BLTIN_SISF 10
9041 #define SH_BLTIN_LDUA_L 11
9043 #define SH_BLTIN_LDUA_Q 12
9045 #define SH_BLTIN_STUA_L 13
9047 #define SH_BLTIN_STUA_Q 14
9049 #define SH_BLTIN_UDI 15
9051 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9052 #define SH_BLTIN_2 16
9053 #define SH_BLTIN_SU 16
9055 #define SH_BLTIN_3 17
9056 #define SH_BLTIN_SUS 17
9058 #define SH_BLTIN_PSSV 18
9060 #define SH_BLTIN_XXUU 19
9061 #define SH_BLTIN_UUUU 19
9063 #define SH_BLTIN_PV 20
9066 /* mcmv: operands considered unsigned. */
9067 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9068 /* mperm: control value considered unsigned int. */
9069 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9070 /* mshards_q: returns signed short. */
9071 /* nsb: takes long long arg, returns unsigned char. */
9072 static const struct builtin_description bdesc[] =
9074 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9075 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9076 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9077 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9078 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9079 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9080 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9082 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9083 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9085 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9086 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9087 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9088 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9089 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9090 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9091 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9092 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9093 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9094 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9095 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9096 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9097 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9098 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9099 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9100 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9101 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9102 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9103 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9104 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9105 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9106 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9107 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9108 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9109 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9110 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9111 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9112 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9113 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9114 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9115 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9116 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9117 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9118 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9119 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9120 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9121 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9122 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9123 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9124 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9125 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9126 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9127 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9128 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9129 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9130 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9131 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9132 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9133 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9134 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9135 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9136 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9137 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9138 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9140 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9141 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9142 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9143 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9144 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9145 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9146 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9147 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9148 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9149 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9150 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9151 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9152 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9153 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9154 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9155 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9157 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9158 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9160 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9161 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9166 sh_media_init_builtins (void)
9168 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9169 const struct builtin_description *d;
9171 memset (shared, 0, sizeof shared);
9172 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9174 tree type, arg_type;
9175 int signature = d->signature;
9178 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9179 type = shared[signature];
9182 int has_result = signature_args[signature][0] != 0;
9184 if (signature_args[signature][1] == 8
9185 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9187 if (! TARGET_FPU_ANY
9188 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9190 type = void_list_node;
9193 int arg = signature_args[signature][i];
9194 int opno = i - 1 + has_result;
9197 arg_type = ptr_type_node;
9199 arg_type = ((*lang_hooks.types.type_for_mode)
9200 (insn_data[d->icode].operand[opno].mode,
9205 arg_type = void_type_node;
9208 type = tree_cons (NULL_TREE, arg_type, type);
9210 type = build_function_type (arg_type, type);
9211 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9212 shared[signature] = type;
9214 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9220 sh_init_builtins (void)
9223 sh_media_init_builtins ();
9226 /* Expand an expression EXP that calls a built-in function,
9227 with result going to TARGET if that's convenient
9228 (and in mode MODE if that's convenient).
9229 SUBTARGET may be used as the target for computing one of EXP's operands.
9230 IGNORE is nonzero if the value is to be ignored. */
9233 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9234 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9236 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9237 tree arglist = TREE_OPERAND (exp, 1);
9238 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9239 const struct builtin_description *d = &bdesc[fcode];
9240 enum insn_code icode = d->icode;
9241 int signature = d->signature;
9242 enum machine_mode tmode = VOIDmode;
9247 if (signature_args[signature][0])
9252 tmode = insn_data[icode].operand[0].mode;
9254 || GET_MODE (target) != tmode
9255 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9256 target = gen_reg_rtx (tmode);
9262 for (i = 1; i <= 3; i++, nop++)
9265 enum machine_mode opmode, argmode;
9267 if (! signature_args[signature][i])
9269 arg = TREE_VALUE (arglist);
9270 if (arg == error_mark_node)
9272 arglist = TREE_CHAIN (arglist);
9273 opmode = insn_data[icode].operand[nop].mode;
9274 argmode = TYPE_MODE (TREE_TYPE (arg));
9275 if (argmode != opmode)
9276 arg = build1 (NOP_EXPR,
9277 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9278 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9279 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9280 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9286 pat = (*insn_data[d->icode].genfun) (op[0]);
9289 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9292 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9295 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9307 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9309 rtx sel0 = const0_rtx;
9310 rtx sel1 = const1_rtx;
9311 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9312 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9314 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9315 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9319 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9321 rtx sel0 = const0_rtx;
9322 rtx sel1 = const1_rtx;
9323 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9325 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9327 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9328 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9331 /* Return the class of registers for which a mode change from FROM to TO
9334 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9335 enum reg_class class)
9337 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9339 if (TARGET_LITTLE_ENDIAN)
9341 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9342 return reg_classes_intersect_p (DF_REGS, class);
9346 if (GET_MODE_SIZE (from) < 8)
9347 return reg_classes_intersect_p (DF_HI_REGS, class);
9354 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9355 that label is used. */
9358 sh_mark_label (rtx address, int nuses)
9360 if (GOTOFF_P (address))
9362 /* Extract the label or symbol. */
9363 address = XEXP (address, 0);
9364 if (GET_CODE (address) == PLUS)
9365 address = XEXP (address, 0);
9366 address = XVECEXP (address, 0, 0);
9368 if (GET_CODE (address) == LABEL_REF
9369 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9370 LABEL_NUSES (XEXP (address, 0)) += nuses;
9373 /* Compute extra cost of moving data between one register class
9376 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9377 uses this information. Hence, the general register <-> floating point
9378 register information here is not used for SFmode. */
9381 sh_register_move_cost (enum machine_mode mode,
9382 enum reg_class srcclass, enum reg_class dstclass)
9384 if (dstclass == T_REGS || dstclass == PR_REGS)
9387 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9390 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9391 && REGCLASS_HAS_FP_REG (srcclass)
9392 && REGCLASS_HAS_FP_REG (dstclass))
9395 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9396 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9399 if ((REGCLASS_HAS_FP_REG (dstclass)
9400 && REGCLASS_HAS_GENERAL_REG (srcclass))
9401 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9402 && REGCLASS_HAS_FP_REG (srcclass)))
9403 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9404 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9406 if ((dstclass == FPUL_REGS
9407 && REGCLASS_HAS_GENERAL_REG (srcclass))
9408 || (srcclass == FPUL_REGS
9409 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9412 if ((dstclass == FPUL_REGS
9413 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9414 || (srcclass == FPUL_REGS
9415 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9418 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9419 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9422 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9423 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9428 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9429 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9430 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9432 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9435 /* Like register_operand, but take into account that SHMEDIA can use
9436 the constant zero like a general register. */
9438 sh_register_operand (rtx op, enum machine_mode mode)
9440 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9442 return register_operand (op, mode);
9446 cmpsi_operand (rtx op, enum machine_mode mode)
9448 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9449 && GET_MODE (op) == SImode)
9451 return arith_operand (op, mode);
9454 static rtx emit_load_ptr (rtx, rtx);
9457 emit_load_ptr (rtx reg, rtx addr)
9459 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9461 if (Pmode != ptr_mode)
9462 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9463 return emit_move_insn (reg, mem);
9467 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9468 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9471 CUMULATIVE_ARGS cum;
9472 int structure_value_byref = 0;
9473 rtx this, this_value, sibcall, insns, funexp;
9474 tree funtype = TREE_TYPE (function);
9475 int simple_add = CONST_OK_FOR_ADD (delta);
9477 rtx scratch0, scratch1, scratch2;
9479 reload_completed = 1;
9480 epilogue_completed = 1;
9482 current_function_uses_only_leaf_regs = 1;
9484 emit_note (NOTE_INSN_PROLOGUE_END);
9486 /* Find the "this" pointer. We have such a wide range of ABIs for the
9487 SH that it's best to do this completely machine independently.
9488 "this" is passed as first argument, unless a structure return pointer
9489 comes first, in which case "this" comes second. */
9490 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9491 #ifndef PCC_STATIC_STRUCT_RETURN
9492 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9493 structure_value_byref = 1;
9494 #endif /* not PCC_STATIC_STRUCT_RETURN */
9495 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9497 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9499 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9501 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9503 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9504 static chain pointer (even if you can't have nested virtual functions
9505 right now, someone might implement them sometime), and the rest of the
9506 registers are used for argument passing, are callee-saved, or reserved. */
9507 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9510 scratch1 = gen_rtx_REG (ptr_mode, 1);
9511 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9512 pointing where to return struct values. */
9513 scratch2 = gen_rtx_REG (Pmode, 3);
9515 else if (TARGET_SHMEDIA)
9517 scratch1 = gen_rtx_REG (ptr_mode, 21);
9518 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9521 this_value = plus_constant (this, delta);
9523 && (simple_add || scratch0 != scratch1)
9524 && strict_memory_address_p (ptr_mode, this_value))
9526 emit_load_ptr (scratch0, this_value);
9532 else if (simple_add)
9533 emit_move_insn (this, this_value);
9536 emit_move_insn (scratch1, GEN_INT (delta));
9537 emit_insn (gen_add2_insn (this, scratch1));
9545 emit_load_ptr (scratch0, this);
9547 offset_addr = plus_constant (scratch0, vcall_offset);
9548 if (strict_memory_address_p (ptr_mode, offset_addr))
9550 else if (! TARGET_SH5)
9552 /* scratch0 != scratch1, and we have indexed loads. Get better
9553 schedule by loading the offset into r1 and using an indexed
9554 load - then the load of r1 can issue before the load from
9555 (this + delta) finishes. */
9556 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9557 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9559 else if (CONST_OK_FOR_ADD (vcall_offset))
9561 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9562 offset_addr = scratch0;
9564 else if (scratch0 != scratch1)
9566 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9567 emit_insn (gen_add2_insn (scratch0, scratch1));
9568 offset_addr = scratch0;
9571 abort (); /* FIXME */
9572 emit_load_ptr (scratch0, offset_addr);
9574 if (Pmode != ptr_mode)
9575 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9576 emit_insn (gen_add2_insn (this, scratch0));
9579 /* Generate a tail call to the target function. */
9580 if (! TREE_USED (function))
9582 assemble_external (function);
9583 TREE_USED (function) = 1;
9585 funexp = XEXP (DECL_RTL (function), 0);
9586 emit_move_insn (scratch2, funexp);
9587 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9588 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9589 SIBLING_CALL_P (sibcall) = 1;
9590 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9593 /* Run just enough of rest_of_compilation to do scheduling and get
9594 the insns emitted. Note that use_thunk calls
9595 assemble_start_function and assemble_end_function. */
9597 insn_locators_initialize ();
9598 insns = get_insns ();
9600 if (optimize > 0 && flag_schedule_insns_after_reload)
9602 find_basic_blocks (insns, max_reg_num (), dump_file);
9603 life_analysis (dump_file, PROP_FINAL);
9605 split_all_insns (1);
9607 schedule_insns (dump_file);
9612 if (optimize > 0 && flag_delayed_branch)
9613 dbr_schedule (insns, dump_file);
9614 shorten_branches (insns);
9615 final_start_function (insns, file, 1);
9616 final (insns, file, 1, 0);
9617 final_end_function ();
9619 if (optimize > 0 && flag_schedule_insns_after_reload)
9621 /* Release all memory allocated by flow. */
9622 free_basic_block_vars ();
9624 /* Release all memory held by regsets now. */
9625 regset_release_memory ();
9628 reload_completed = 0;
9629 epilogue_completed = 0;
9634 function_symbol (const char *name)
9636 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9637 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9641 /* Find the number of a general purpose register in S. */
9643 scavenge_reg (HARD_REG_SET *s)
9646 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9647 if (TEST_HARD_REG_BIT (*s, r))
9653 sh_get_pr_initial_val (void)
9657 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9658 PR register on SHcompact, because it might be clobbered by the prologue.
9659 We check first if that is known to be the case. */
9660 if (TARGET_SHCOMPACT
9661 && ((current_function_args_info.call_cookie
9662 & ~ CALL_COOKIE_RET_TRAMP (1))
9663 || current_function_has_nonlocal_label))
9664 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9666 /* If we haven't finished rtl generation, there might be a nonlocal label
9667 that we haven't seen yet.
9668 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9669 is set, unless it has been called before for the same register. And even
9670 then, we end in trouble if we didn't use the register in the same
9671 basic block before. So call get_hard_reg_initial_val now and wrap it
9672 in an unspec if we might need to replace it. */
9673 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9674 combine can put the pseudo returned by get_hard_reg_initial_val into
9675 instructions that need a general purpose registers, which will fail to
9676 be recognized when the pseudo becomes allocated to PR. */
9678 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9680 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9685 sh_expand_t_scc (enum rtx_code code, rtx target)
9687 rtx result = target;
9690 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9691 || GET_CODE (sh_compare_op1) != CONST_INT)
9693 if (GET_CODE (result) != REG)
9694 result = gen_reg_rtx (SImode);
9695 val = INTVAL (sh_compare_op1);
9696 if ((code == EQ && val == 1) || (code == NE && val == 0))
9697 emit_insn (gen_movt (result));
9698 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9700 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9701 emit_insn (gen_subc (result, result, result));
9702 emit_insn (gen_addsi3 (result, result, const1_rtx));
9704 else if (code == EQ || code == NE)
9705 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9708 if (result != target)
9709 emit_move_insn (target, result);
9713 /* INSN is an sfunc; return the rtx that describes the address used. */
9715 extract_sfunc_addr (rtx insn)
9717 rtx pattern, part = NULL_RTX;
9720 pattern = PATTERN (insn);
9721 len = XVECLEN (pattern, 0);
9722 for (i = 0; i < len; i++)
9724 part = XVECEXP (pattern, 0, i);
9725 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9726 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9727 return XEXP (part, 0);
9729 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9730 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9734 /* Verify that the register in use_sfunc_addr still agrees with the address
9735 used in the sfunc. This prevents fill_slots_from_thread from changing
9737 INSN is the use_sfunc_addr instruction, and REG is the register it
9740 check_use_sfunc_addr (rtx insn, rtx reg)
9742 /* Search for the sfunc. It should really come right after INSN. */
9743 while ((insn = NEXT_INSN (insn)))
9745 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9747 if (! INSN_P (insn))
9750 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9751 insn = XVECEXP (PATTERN (insn), 0, 0);
9752 if (GET_CODE (PATTERN (insn)) != PARALLEL
9753 || get_attr_type (insn) != TYPE_SFUNC)
9755 return rtx_equal_p (extract_sfunc_addr (insn), reg);