1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
57 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
60 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62 /* These are some macros to abstract register modes. */
63 #define CONST_OK_FOR_ADD(size) \
64 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
65 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
66 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
67 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
70 int current_function_interrupt;
72 /* ??? The pragma interrupt support will not work for SH3. */
73 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
74 output code for the next function appropriate for an interrupt handler. */
77 /* This is set by the trap_exit attribute for functions. It specifies
78 a trap number to be used in a trapa instruction at function exit
79 (instead of an rte instruction). */
82 /* This is used by the sp_switch attribute for functions. It specifies
83 a variable holding the address of the stack the interrupt function
84 should switch to/from at entry/exit. */
87 /* This is set by #pragma trapa, and is similar to the above, except that
88 the compiler doesn't emit code to preserve all registers. */
89 static int pragma_trapa;
91 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
92 which has a separate set of low regs for User and Supervisor modes.
93 This should only be used for the lowest level of interrupts. Higher levels
94 of interrupts must save the registers in case they themselves are
96 int pragma_nosave_low_regs;
98 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
99 sh_expand_prologue. */
100 int current_function_anonymous_args;
102 /* Global variables for machine-dependent things. */
104 /* Which cpu are we scheduling for. */
105 enum processor_type sh_cpu;
107 /* Definitions used in ready queue reordering for first scheduling pass. */
109 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
110 static short *regmode_weight[2];
112 /* Total SFmode and SImode weights of scheduled insns. */
113 static int curr_regmode_pressure[2];
115 /* If true, skip cycles for Q -> R movement. */
116 static int skip_cycles = 0;
118 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
119 and returned from sh_reorder2. */
120 static short cached_can_issue_more;
122 /* Saved operands from the last compare to use when we generate an scc
128 /* Provides the class number of the smallest class containing
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 /* Provide reg_class from a letter such as appears in the machine
182 description. *: target independently reserved letter.
183 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
185 enum reg_class reg_class_from_letter[] =
187 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
188 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
189 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
190 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
191 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
192 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
193 /* y */ FPUL_REGS, /* z */ R0_REGS
196 int assembler_dialect;
198 static bool shmedia_space_reserved_for_target_registers;
200 static void split_branches (rtx);
201 static int branch_dest (rtx);
202 static void force_into (rtx, rtx);
203 static void print_slot (rtx);
204 static rtx add_constant (rtx, enum machine_mode, rtx);
205 static void dump_table (rtx, rtx);
206 static int hi_const (rtx);
207 static int broken_move (rtx);
208 static int mova_p (rtx);
209 static rtx find_barrier (int, rtx, rtx);
210 static int noncall_uses_reg (rtx, rtx, rtx *);
211 static rtx gen_block_redirect (rtx, int, int);
212 static void sh_reorg (void);
213 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
214 static rtx frame_insn (rtx);
215 static rtx push (int);
216 static void pop (int);
217 static void push_regs (HARD_REG_SET *, int);
218 static int calc_live_regs (HARD_REG_SET *);
219 static void mark_use (rtx, rtx *);
220 static HOST_WIDE_INT rounded_frame_size (int);
221 static rtx mark_constant_pool_use (rtx);
222 const struct attribute_spec sh_attribute_table[];
223 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static int sh_adjust_cost (rtx, rtx, rtx, int);
230 static int sh_use_dfa_interface (void);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (int, enum machine_mode);
236 static void sh_md_init_global (FILE *, int, int);
237 static void sh_md_finish_global (FILE *, int);
238 static int rank_for_reorder (const void *, const void *);
239 static void swap_reorder (rtx *, int);
240 static void ready_reorder (rtx *, int);
241 static short high_pressure (enum machine_mode);
242 static int sh_reorder (FILE *, int, rtx *, int *, int);
243 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
244 static void sh_md_init (FILE *, int, int);
245 static int sh_variable_issue (FILE *, int, rtx, int);
247 static bool sh_function_ok_for_sibcall (tree, tree);
249 static bool sh_cannot_modify_jumps_p (void);
250 static int sh_target_reg_class (void);
251 static bool sh_optimize_target_register_callee_saved (bool);
252 static bool sh_ms_bitfield_layout_p (tree);
254 static void sh_init_builtins (void);
255 static void sh_media_init_builtins (void);
256 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
257 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
258 static void sh_file_start (void);
259 static int flow_dependent_p (rtx, rtx);
260 static void flow_dependent_p_1 (rtx, rtx, void *);
261 static int shiftcosts (rtx);
262 static int andcosts (rtx);
263 static int addsubcosts (rtx);
264 static int multcosts (rtx);
265 static bool unspec_caller_rtx_p (rtx);
266 static bool sh_cannot_copy_insn_p (rtx);
267 static bool sh_rtx_costs (rtx, int, int, int *);
268 static int sh_address_cost (rtx);
269 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
270 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
271 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
272 static int scavenge_reg (HARD_REG_SET *s);
273 struct save_schedule_s;
274 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
275 struct save_schedule_s *, int);
277 static rtx sh_struct_value_rtx (tree, int);
278 static bool sh_return_in_memory (tree, tree);
279 static rtx sh_builtin_saveregs (void);
280 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
281 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
282 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
283 static tree sh_build_builtin_va_list (void);
284 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
287 /* Initialize the GCC target structure. */
288 #undef TARGET_ATTRIBUTE_TABLE
289 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
291 /* The next two are used for debug info when compiling with -gdwarf. */
292 #undef TARGET_ASM_UNALIGNED_HI_OP
293 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
294 #undef TARGET_ASM_UNALIGNED_SI_OP
295 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
297 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
298 #undef TARGET_ASM_UNALIGNED_DI_OP
299 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
300 #undef TARGET_ASM_ALIGNED_DI_OP
301 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
303 #undef TARGET_ASM_FUNCTION_EPILOGUE
304 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
306 #undef TARGET_ASM_OUTPUT_MI_THUNK
307 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
309 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
310 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
312 #undef TARGET_ASM_FILE_START
313 #define TARGET_ASM_FILE_START sh_file_start
314 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
315 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
317 #undef TARGET_INSERT_ATTRIBUTES
318 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
320 #undef TARGET_SCHED_ADJUST_COST
321 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
323 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
324 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
326 #undef TARGET_SCHED_ISSUE_RATE
327 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
329 /* The next 5 hooks have been implemented for reenabling sched1. With the
330 help of these macros we are limiting the movement of insns in sched1 to
331 reduce the register pressure. The overall idea is to keep count of SImode
332 and SFmode regs required by already scheduled insns. When these counts
333 cross some threshold values; give priority to insns that free registers.
334 The insn that frees registers is most likely to be the insn with lowest
335 LUID (original insn order); but such an insn might be there in the stalled
336 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
337 upto a max of 8 cycles so that such insns may move from Q -> R.
339 The description of the hooks are as below:
341 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
342 scheduler; it is called inside the sched_init function just after
343 find_insn_reg_weights function call. It is used to calculate the SImode
344 and SFmode weights of insns of basic blocks; much similar to what
345 find_insn_reg_weights does.
346 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
348 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
349 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
352 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
353 high; reorder the ready queue so that the insn with lowest LUID will be
356 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
357 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
359 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
360 can be returned from TARGET_SCHED_REORDER2.
362 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
364 #undef TARGET_SCHED_DFA_NEW_CYCLE
365 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
367 #undef TARGET_SCHED_INIT_GLOBAL
368 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
370 #undef TARGET_SCHED_FINISH_GLOBAL
371 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
373 #undef TARGET_SCHED_VARIABLE_ISSUE
374 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
376 #undef TARGET_SCHED_REORDER
377 #define TARGET_SCHED_REORDER sh_reorder
379 #undef TARGET_SCHED_REORDER2
380 #define TARGET_SCHED_REORDER2 sh_reorder2
382 #undef TARGET_SCHED_INIT
383 #define TARGET_SCHED_INIT sh_md_init
385 #undef TARGET_CANNOT_MODIFY_JUMPS_P
386 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
387 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
388 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
389 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
390 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
391 sh_optimize_target_register_callee_saved
393 #undef TARGET_MS_BITFIELD_LAYOUT_P
394 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
396 #undef TARGET_INIT_BUILTINS
397 #define TARGET_INIT_BUILTINS sh_init_builtins
398 #undef TARGET_EXPAND_BUILTIN
399 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
401 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
402 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
404 #undef TARGET_CANNOT_COPY_INSN_P
405 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
406 #undef TARGET_RTX_COSTS
407 #define TARGET_RTX_COSTS sh_rtx_costs
408 #undef TARGET_ADDRESS_COST
409 #define TARGET_ADDRESS_COST sh_address_cost
411 #undef TARGET_MACHINE_DEPENDENT_REORG
412 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
415 #undef TARGET_HAVE_TLS
416 #define TARGET_HAVE_TLS true
419 #undef TARGET_PROMOTE_PROTOTYPES
420 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
421 #undef TARGET_PROMOTE_FUNCTION_ARGS
422 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
423 #undef TARGET_PROMOTE_FUNCTION_RETURN
424 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
426 #undef TARGET_STRUCT_VALUE_RTX
427 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
428 #undef TARGET_RETURN_IN_MEMORY
429 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
431 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
432 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
433 #undef TARGET_SETUP_INCOMING_VARARGS
434 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
435 #undef TARGET_STRICT_ARGUMENT_NAMING
436 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
437 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
438 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
440 #undef TARGET_BUILD_BUILTIN_VA_LIST
441 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
442 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
443 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
445 #undef TARGET_PCH_VALID_P
446 #define TARGET_PCH_VALID_P sh_pch_valid_p
448 /* Return regmode weight for insn. */
449 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
451 /* Return current register pressure for regmode. */
452 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
454 struct gcc_target targetm = TARGET_INITIALIZER;
456 /* Print the operand address in x to the stream. */
459 print_operand_address (FILE *stream, rtx x)
461 switch (GET_CODE (x))
465 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
470 rtx base = XEXP (x, 0);
471 rtx index = XEXP (x, 1);
473 switch (GET_CODE (index))
476 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
477 reg_names[true_regnum (base)]);
483 int base_num = true_regnum (base);
484 int index_num = true_regnum (index);
486 fprintf (stream, "@(r0,%s)",
487 reg_names[MAX (base_num, index_num)]);
499 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
503 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
507 x = mark_constant_pool_use (x);
508 output_addr_const (stream, x);
513 /* Print operand x (an rtx) in assembler syntax to file stream
514 according to modifier code.
516 '.' print a .s if insn needs delay slot
517 ',' print LOCAL_LABEL_PREFIX
518 '@' print trap, rte or rts depending upon pragma interruptness
519 '#' output a nop if there is nothing to put in the delay slot
520 ''' print likelihood suffix (/u for unlikely).
521 'O' print a constant without the #
522 'R' print the LSW of a dp value - changes if in little endian
523 'S' print the MSW of a dp value - changes if in little endian
524 'T' print the next word of a dp value - same as 'R' in big endian mode.
525 'M' print an `x' if `m' will print `base,index'.
526 'N' print 'r63' if the operand is (const_int 0).
527 'm' print a pair `base,offset' or `base,index', for LD and ST.
528 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
529 'o' output an operator. */
532 print_operand (FILE *stream, rtx x, int code)
538 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
539 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
540 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
543 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
547 fprintf (stream, "trapa #%d", trap_exit);
548 else if (sh_cfun_interrupt_handler_p ())
549 fprintf (stream, "rte");
551 fprintf (stream, "rts");
554 /* Output a nop if there's nothing in the delay slot. */
555 if (dbr_sequence_length () == 0)
556 fprintf (stream, "\n\tnop");
560 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
562 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
563 fputs ("/u", stream);
567 x = mark_constant_pool_use (x);
568 output_addr_const (stream, x);
571 fputs (reg_names[REGNO (x) + LSW], (stream));
574 fputs (reg_names[REGNO (x) + MSW], (stream));
577 /* Next word of a double. */
578 switch (GET_CODE (x))
581 fputs (reg_names[REGNO (x) + 1], (stream));
584 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
585 && GET_CODE (XEXP (x, 0)) != POST_INC)
586 x = adjust_address (x, SImode, 4);
587 print_operand_address (stream, XEXP (x, 0));
594 switch (GET_CODE (x))
596 case PLUS: fputs ("add", stream); break;
597 case MINUS: fputs ("sub", stream); break;
598 case MULT: fputs ("mul", stream); break;
599 case DIV: fputs ("div", stream); break;
600 case EQ: fputs ("eq", stream); break;
601 case NE: fputs ("ne", stream); break;
602 case GT: case LT: fputs ("gt", stream); break;
603 case GE: case LE: fputs ("ge", stream); break;
604 case GTU: case LTU: fputs ("gtu", stream); break;
605 case GEU: case LEU: fputs ("geu", stream); break;
611 if (GET_CODE (x) == MEM
612 && GET_CODE (XEXP (x, 0)) == PLUS
613 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
614 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
619 if (GET_CODE (x) != MEM)
622 switch (GET_CODE (x))
626 print_operand (stream, x, 0);
627 fputs (", 0", stream);
631 print_operand (stream, XEXP (x, 0), 0);
632 fputs (", ", stream);
633 print_operand (stream, XEXP (x, 1), 0);
642 if (x == CONST0_RTX (GET_MODE (x)))
644 fprintf ((stream), "r63");
649 if (GET_CODE (x) == CONST_INT)
651 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
658 switch (GET_CODE (x))
660 /* FIXME: We need this on SHmedia32 because reload generates
661 some sign-extended HI or QI loads into DImode registers
662 but, because Pmode is SImode, the address ends up with a
663 subreg:SI of the DImode register. Maybe reload should be
664 fixed so as to apply alter_subreg to such loads? */
666 if (SUBREG_BYTE (x) != 0
667 || GET_CODE (SUBREG_REG (x)) != REG)
674 if (FP_REGISTER_P (REGNO (x))
675 && GET_MODE (x) == V16SFmode)
676 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
677 else if (FP_REGISTER_P (REGNO (x))
678 && GET_MODE (x) == V4SFmode)
679 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
680 else if (GET_CODE (x) == REG
681 && GET_MODE (x) == V2SFmode)
682 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
683 else if (FP_REGISTER_P (REGNO (x))
684 && GET_MODE_SIZE (GET_MODE (x)) > 4)
685 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
687 fputs (reg_names[REGNO (x)], (stream));
691 output_address (XEXP (x, 0));
696 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
697 && GET_MODE (XEXP (x, 0)) == DImode
698 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
699 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
701 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
704 if (GET_CODE (val) == ASHIFTRT)
707 if (GET_CODE (XEXP (val, 0)) == CONST)
709 output_addr_const (stream, XEXP (val, 0));
710 if (GET_CODE (XEXP (val, 0)) == CONST)
712 fputs (" >> ", stream);
713 output_addr_const (stream, XEXP (val, 1));
718 if (GET_CODE (val) == CONST)
720 output_addr_const (stream, val);
721 if (GET_CODE (val) == CONST)
724 fputs (" & 65535)", stream);
732 output_addr_const (stream, x);
739 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
741 force_into (rtx value, rtx target)
743 value = force_operand (value, target);
744 if (! rtx_equal_p (value, target))
745 emit_insn (gen_move_insn (target, value));
748 /* Emit code to perform a block move. Choose the best method.
750 OPERANDS[0] is the destination.
751 OPERANDS[1] is the source.
752 OPERANDS[2] is the size.
753 OPERANDS[3] is the alignment safe to use. */
756 expand_block_move (rtx *operands)
758 int align = INTVAL (operands[3]);
759 int constp = (GET_CODE (operands[2]) == CONST_INT);
760 int bytes = (constp ? INTVAL (operands[2]) : 0);
762 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
763 alignment, or if it isn't a multiple of 4 bytes, then fail. */
764 if (! constp || align < 4 || (bytes % 4 != 0))
771 else if (bytes == 12)
776 rtx r4 = gen_rtx_REG (SImode, 4);
777 rtx r5 = gen_rtx_REG (SImode, 5);
779 entry_name = get_identifier ("__movmemSI12_i4");
781 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
782 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
783 force_into (XEXP (operands[0], 0), r4);
784 force_into (XEXP (operands[1], 0), r5);
785 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
788 else if (! TARGET_SMALLCODE)
794 rtx r4 = gen_rtx_REG (SImode, 4);
795 rtx r5 = gen_rtx_REG (SImode, 5);
796 rtx r6 = gen_rtx_REG (SImode, 6);
798 entry_name = get_identifier (bytes & 4
800 : "__movmem_i4_even");
801 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
802 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
803 force_into (XEXP (operands[0], 0), r4);
804 force_into (XEXP (operands[1], 0), r5);
807 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
808 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
820 rtx r4 = gen_rtx_REG (SImode, 4);
821 rtx r5 = gen_rtx_REG (SImode, 5);
823 sprintf (entry, "__movmemSI%d", bytes);
824 entry_name = get_identifier (entry);
825 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
826 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
827 force_into (XEXP (operands[0], 0), r4);
828 force_into (XEXP (operands[1], 0), r5);
829 emit_insn (gen_block_move_real (func_addr_rtx));
833 /* This is the same number of bytes as a memcpy call, but to a different
834 less common function name, so this will occasionally use more space. */
835 if (! TARGET_SMALLCODE)
840 int final_switch, while_loop;
841 rtx r4 = gen_rtx_REG (SImode, 4);
842 rtx r5 = gen_rtx_REG (SImode, 5);
843 rtx r6 = gen_rtx_REG (SImode, 6);
845 entry_name = get_identifier ("__movmem");
846 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
847 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
848 force_into (XEXP (operands[0], 0), r4);
849 force_into (XEXP (operands[1], 0), r5);
851 /* r6 controls the size of the move. 16 is decremented from it
852 for each 64 bytes moved. Then the negative bit left over is used
853 as an index into a list of move instructions. e.g., a 72 byte move
854 would be set up with size(r6) = 14, for one iteration through the
855 big while loop, and a switch of -2 for the last part. */
857 final_switch = 16 - ((bytes / 4) % 16);
858 while_loop = ((bytes / 4) / 16 - 1) * 16;
859 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
860 emit_insn (gen_block_lump_real (func_addr_rtx));
867 /* Prepare operands for a move define_expand; specifically, one of the
868 operands must be in a register. */
871 prepare_move_operands (rtx operands[], enum machine_mode mode)
873 if ((mode == SImode || mode == DImode)
875 && ! ((mode == Pmode || mode == ptr_mode)
876 && tls_symbolic_operand (operands[1], Pmode) != 0))
879 if (SYMBOLIC_CONST_P (operands[1]))
881 if (GET_CODE (operands[0]) == MEM)
882 operands[1] = force_reg (Pmode, operands[1]);
883 else if (TARGET_SHMEDIA
884 && GET_CODE (operands[1]) == LABEL_REF
885 && target_reg_operand (operands[0], mode))
889 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
890 operands[1] = legitimize_pic_address (operands[1], mode, temp);
893 else if (GET_CODE (operands[1]) == CONST
894 && GET_CODE (XEXP (operands[1], 0)) == PLUS
895 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
897 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
898 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
900 operands[1] = expand_binop (mode, add_optab, temp,
901 XEXP (XEXP (operands[1], 0), 1),
902 no_new_pseudos ? temp
903 : gen_reg_rtx (Pmode),
908 if (! reload_in_progress && ! reload_completed)
910 /* Copy the source to a register if both operands aren't registers. */
911 if (! register_operand (operands[0], mode)
912 && ! sh_register_operand (operands[1], mode))
913 operands[1] = copy_to_mode_reg (mode, operands[1]);
915 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
917 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
918 except that we can't use that function because it is static. */
919 rtx new = change_address (operands[0], mode, 0);
920 MEM_COPY_ATTRIBUTES (new, operands[0]);
924 /* This case can happen while generating code to move the result
925 of a library call to the target. Reject `st r0,@(rX,rY)' because
926 reload will fail to find a spill register for rX, since r0 is already
927 being used for the source. */
928 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
929 && GET_CODE (operands[0]) == MEM
930 && GET_CODE (XEXP (operands[0], 0)) == PLUS
931 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
932 operands[1] = copy_to_mode_reg (mode, operands[1]);
935 if (mode == Pmode || mode == ptr_mode)
938 enum tls_model tls_kind;
942 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
944 rtx tga_op1, tga_ret, tmp, tmp2;
949 case TLS_MODEL_GLOBAL_DYNAMIC:
950 tga_ret = gen_rtx_REG (Pmode, R0_REG);
951 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
955 case TLS_MODEL_LOCAL_DYNAMIC:
956 tga_ret = gen_rtx_REG (Pmode, R0_REG);
957 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
959 tmp = gen_reg_rtx (Pmode);
960 emit_move_insn (tmp, tga_ret);
962 if (register_operand (op0, Pmode))
965 tmp2 = gen_reg_rtx (Pmode);
967 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
971 case TLS_MODEL_INITIAL_EXEC:
973 emit_insn (gen_GOTaddr2picreg ());
974 tga_op1 = gen_reg_rtx (Pmode);
975 tmp = gen_sym2GOTTPOFF (op1);
976 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
980 case TLS_MODEL_LOCAL_EXEC:
981 tmp2 = gen_reg_rtx (Pmode);
982 emit_insn (gen_load_gbr (tmp2));
983 tmp = gen_reg_rtx (Pmode);
984 emit_insn (gen_symTPOFF2reg (tmp, op1));
985 RTX_UNCHANGING_P (tmp) = 1;
987 if (register_operand (op0, Pmode))
990 op1 = gen_reg_rtx (Pmode);
992 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1005 /* Prepare the operands for an scc instruction; make sure that the
1006 compare has been done. */
1008 prepare_scc_operands (enum rtx_code code)
1010 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1011 enum rtx_code oldcode = code;
1012 enum machine_mode mode;
1014 /* First need a compare insn. */
1018 /* It isn't possible to handle this case. */
1035 if (code != oldcode)
1037 rtx tmp = sh_compare_op0;
1038 sh_compare_op0 = sh_compare_op1;
1039 sh_compare_op1 = tmp;
1042 mode = GET_MODE (sh_compare_op0);
1043 if (mode == VOIDmode)
1044 mode = GET_MODE (sh_compare_op1);
1046 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1047 if ((code != EQ && code != NE
1048 && (sh_compare_op1 != const0_rtx
1049 || code == GTU || code == GEU || code == LTU || code == LEU))
1050 || (mode == DImode && sh_compare_op1 != const0_rtx)
1051 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1052 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1054 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1055 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1056 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1057 gen_rtx_SET (VOIDmode, t_reg,
1058 gen_rtx_fmt_ee (code, SImode,
1059 sh_compare_op0, sh_compare_op1)),
1060 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1062 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1063 gen_rtx_fmt_ee (code, SImode,
1064 sh_compare_op0, sh_compare_op1)));
1069 /* Called from the md file, set up the operands of a compare instruction. */
1072 from_compare (rtx *operands, int code)
1074 enum machine_mode mode = GET_MODE (sh_compare_op0);
1076 if (mode == VOIDmode)
1077 mode = GET_MODE (sh_compare_op1);
1080 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1082 /* Force args into regs, since we can't use constants here. */
1083 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1084 if (sh_compare_op1 != const0_rtx
1085 || code == GTU || code == GEU
1086 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1087 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1089 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1091 from_compare (operands, GT);
1092 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1095 insn = gen_rtx_SET (VOIDmode,
1096 gen_rtx_REG (SImode, T_REG),
1097 gen_rtx_fmt_ee (code, SImode,
1098 sh_compare_op0, sh_compare_op1));
1099 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1101 insn = gen_rtx_PARALLEL (VOIDmode,
1103 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1104 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1110 /* Functions to output assembly code. */
1112 /* Return a sequence of instructions to perform DI or DF move.
1114 Since the SH cannot move a DI or DF in one instruction, we have
1115 to take care when we see overlapping source and dest registers. */
1118 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1119 enum machine_mode mode)
1121 rtx dst = operands[0];
1122 rtx src = operands[1];
1124 if (GET_CODE (dst) == MEM
1125 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1126 return "mov.l %T1,%0\n\tmov.l %1,%0";
1128 if (register_operand (dst, mode)
1129 && register_operand (src, mode))
1131 if (REGNO (src) == MACH_REG)
1132 return "sts mach,%S0\n\tsts macl,%R0";
1134 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1135 when mov.d r1,r0 do r1->r0 then r2->r1. */
1137 if (REGNO (src) + 1 == REGNO (dst))
1138 return "mov %T1,%T0\n\tmov %1,%0";
1140 return "mov %1,%0\n\tmov %T1,%T0";
1142 else if (GET_CODE (src) == CONST_INT)
1144 if (INTVAL (src) < 0)
1145 output_asm_insn ("mov #-1,%S0", operands);
1147 output_asm_insn ("mov #0,%S0", operands);
1149 return "mov %1,%R0";
1151 else if (GET_CODE (src) == MEM)
1154 int dreg = REGNO (dst);
1155 rtx inside = XEXP (src, 0);
1157 if (GET_CODE (inside) == REG)
1158 ptrreg = REGNO (inside);
1159 else if (GET_CODE (inside) == SUBREG)
1160 ptrreg = subreg_regno (inside);
1161 else if (GET_CODE (inside) == PLUS)
1163 ptrreg = REGNO (XEXP (inside, 0));
1164 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1165 an offsettable address. Unfortunately, offsettable addresses use
1166 QImode to check the offset, and a QImode offsettable address
1167 requires r0 for the other operand, which is not currently
1168 supported, so we can't use the 'o' constraint.
1169 Thus we must check for and handle r0+REG addresses here.
1170 We punt for now, since this is likely very rare. */
1171 if (GET_CODE (XEXP (inside, 1)) == REG)
1174 else if (GET_CODE (inside) == LABEL_REF)
1175 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1176 else if (GET_CODE (inside) == POST_INC)
1177 return "mov.l %1,%0\n\tmov.l %1,%T0";
1181 /* Work out the safe way to copy. Copy into the second half first. */
1183 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1186 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1189 /* Print an instruction which would have gone into a delay slot after
1190 another instruction, but couldn't because the other instruction expanded
1191 into a sequence where putting the slot insn at the end wouldn't work. */
1194 print_slot (rtx insn)
1196 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1198 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1202 output_far_jump (rtx insn, rtx op)
1204 struct { rtx lab, reg, op; } this;
1205 rtx braf_base_lab = NULL_RTX;
1208 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1211 this.lab = gen_label_rtx ();
1215 && offset - get_attr_length (insn) <= 32766)
1218 jump = "mov.w %O0,%1; braf %1";
1226 jump = "mov.l %O0,%1; braf %1";
1228 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1231 jump = "mov.l %O0,%1; jmp @%1";
1233 /* If we have a scratch register available, use it. */
1234 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1235 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1237 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1238 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1239 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1240 output_asm_insn (jump, &this.lab);
1241 if (dbr_sequence_length ())
1242 print_slot (final_sequence);
1244 output_asm_insn ("nop", 0);
1248 /* Output the delay slot insn first if any. */
1249 if (dbr_sequence_length ())
1250 print_slot (final_sequence);
1252 this.reg = gen_rtx_REG (SImode, 13);
1253 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1254 Fortunately, MACL is fixed and call-clobbered, and we never
1255 need its value across jumps, so save r13 in it instead of in
1258 output_asm_insn ("lds r13, macl", 0);
1260 output_asm_insn ("mov.l r13,@-r15", 0);
1261 output_asm_insn (jump, &this.lab);
1263 output_asm_insn ("sts macl, r13", 0);
1265 output_asm_insn ("mov.l @r15+,r13", 0);
1267 if (far && flag_pic && TARGET_SH2)
1269 braf_base_lab = gen_label_rtx ();
1270 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1271 CODE_LABEL_NUMBER (braf_base_lab));
1274 output_asm_insn (".align 2", 0);
1275 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1277 if (far && flag_pic)
1280 this.lab = braf_base_lab;
1281 output_asm_insn (".long %O2-%O0", &this.lab);
1284 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1288 /* Local label counter, used for constants in the pool and inside
1289 pattern branches. */
1291 static int lf = 100;
1293 /* Output code for ordinary branches. */
1296 output_branch (int logic, rtx insn, rtx *operands)
1298 switch (get_attr_length (insn))
1301 /* This can happen if filling the delay slot has caused a forward
1302 branch to exceed its range (we could reverse it, but only
1303 when we know we won't overextend other branches; this should
1304 best be handled by relaxation).
1305 It can also happen when other condbranches hoist delay slot insn
1306 from their destination, thus leading to code size increase.
1307 But the branch will still be in the range -4092..+4098 bytes. */
1312 /* The call to print_slot will clobber the operands. */
1313 rtx op0 = operands[0];
1315 /* If the instruction in the delay slot is annulled (true), then
1316 there is no delay slot where we can put it now. The only safe
1317 place for it is after the label. final will do that by default. */
1320 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1322 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1323 ASSEMBLER_DIALECT ? "/" : ".", label);
1324 print_slot (final_sequence);
1327 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1329 output_asm_insn ("bra\t%l0", &op0);
1330 fprintf (asm_out_file, "\tnop\n");
1331 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1335 /* When relaxing, handle this like a short branch. The linker
1336 will fix it up if it still doesn't fit after relaxation. */
1338 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1340 /* These are for SH2e, in which we have to account for the
1341 extra nop because of the hardware bug in annulled branches. */
1348 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1350 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1352 ASSEMBLER_DIALECT ? "/" : ".", label);
1353 fprintf (asm_out_file, "\tnop\n");
1354 output_asm_insn ("bra\t%l0", operands);
1355 fprintf (asm_out_file, "\tnop\n");
1356 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1360 /* When relaxing, fall through. */
1365 sprintf (buffer, "b%s%ss\t%%l0",
1367 ASSEMBLER_DIALECT ? "/" : ".");
1368 output_asm_insn (buffer, &operands[0]);
1373 /* There should be no longer branches now - that would
1374 indicate that something has destroyed the branches set
1375 up in machine_dependent_reorg. */
1381 output_branchy_insn (enum rtx_code code, const char *template,
1382 rtx insn, rtx *operands)
1384 rtx next_insn = NEXT_INSN (insn);
1386 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1388 rtx src = SET_SRC (PATTERN (next_insn));
1389 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1391 /* Following branch not taken */
1392 operands[9] = gen_label_rtx ();
1393 emit_label_after (operands[9], next_insn);
1394 INSN_ADDRESSES_NEW (operands[9],
1395 INSN_ADDRESSES (INSN_UID (next_insn))
1396 + get_attr_length (next_insn));
1401 int offset = (branch_dest (next_insn)
1402 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1403 if (offset >= -252 && offset <= 258)
1405 if (GET_CODE (src) == IF_THEN_ELSE)
1407 src = XEXP (src, 1);
1413 operands[9] = gen_label_rtx ();
1414 emit_label_after (operands[9], insn);
1415 INSN_ADDRESSES_NEW (operands[9],
1416 INSN_ADDRESSES (INSN_UID (insn))
1417 + get_attr_length (insn));
1422 output_ieee_ccmpeq (rtx insn, rtx *operands)
1424 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1427 /* Output the start of the assembler file. */
1430 sh_file_start (void)
1432 default_file_start ();
1435 /* We need to show the text section with the proper
1436 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1437 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1438 will complain. We can teach GAS specifically about the
1439 default attributes for our choice of text section, but
1440 then we would have to change GAS again if/when we change
1441 the text section name. */
1442 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1444 /* Switch to the data section so that the coffsem symbol
1445 isn't in the text section. */
1448 if (TARGET_LITTLE_ENDIAN)
1449 fputs ("\t.little\n", asm_out_file);
1453 if (TARGET_SHCOMPACT)
1454 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1455 else if (TARGET_SHMEDIA)
1456 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1457 TARGET_SHMEDIA64 ? 64 : 32);
1461 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1464 unspec_caller_rtx_p (rtx pat)
1466 switch (GET_CODE (pat))
1469 return unspec_caller_rtx_p (XEXP (pat, 0));
1472 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1474 return unspec_caller_rtx_p (XEXP (pat, 1));
1476 if (XINT (pat, 1) == UNSPEC_CALLER)
1485 /* Indicate that INSN cannot be duplicated. This is true for insn
1486 that generates an unique label. */
1489 sh_cannot_copy_insn_p (rtx insn)
1493 if (!reload_completed || !flag_pic)
1496 if (GET_CODE (insn) != INSN)
1498 if (asm_noperands (insn) >= 0)
1501 pat = PATTERN (insn);
1502 if (GET_CODE (pat) != SET)
1504 pat = SET_SRC (pat);
1506 if (unspec_caller_rtx_p (pat))
1512 /* Actual number of instructions used to make a shift by N. */
1513 static const char ashiftrt_insns[] =
1514 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1516 /* Left shift and logical right shift are the same. */
1517 static const char shift_insns[] =
1518 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1520 /* Individual shift amounts needed to get the above length sequences.
1521 One bit right shifts clobber the T bit, so when possible, put one bit
1522 shifts in the middle of the sequence, so the ends are eligible for
1523 branch delay slots. */
1524 static const short shift_amounts[32][5] = {
1525 {0}, {1}, {2}, {2, 1},
1526 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1527 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1528 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1529 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1530 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1531 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1532 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1534 /* Likewise, but for shift amounts < 16, up to three highmost bits
1535 might be clobbered. This is typically used when combined with some
1536 kind of sign or zero extension. */
1538 static const char ext_shift_insns[] =
1539 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1541 static const short ext_shift_amounts[32][4] = {
1542 {0}, {1}, {2}, {2, 1},
1543 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1544 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1545 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1546 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1547 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1548 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1549 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1551 /* Assuming we have a value that has been sign-extended by at least one bit,
1552 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1553 to shift it by N without data loss, and quicker than by other means? */
1554 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1556 /* This is used in length attributes in sh.md to help compute the length
1557 of arbitrary constant shift instructions. */
1560 shift_insns_rtx (rtx insn)
1562 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1563 int shift_count = INTVAL (XEXP (set_src, 1));
1564 enum rtx_code shift_code = GET_CODE (set_src);
1569 return ashiftrt_insns[shift_count];
1572 return shift_insns[shift_count];
1578 /* Return the cost of a shift. */
1588 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1590 if (GET_MODE (x) == DImode
1591 && GET_CODE (XEXP (x, 1)) == CONST_INT
1592 && INTVAL (XEXP (x, 1)) == 1)
1595 /* Everything else is invalid, because there is no pattern for it. */
1598 /* If shift by a non constant, then this will be expensive. */
1599 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1600 return SH_DYNAMIC_SHIFT_COST;
1602 value = INTVAL (XEXP (x, 1));
1604 /* Otherwise, return the true cost in instructions. */
1605 if (GET_CODE (x) == ASHIFTRT)
1607 int cost = ashiftrt_insns[value];
1608 /* If SH3, then we put the constant in a reg and use shad. */
1609 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1610 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1614 return shift_insns[value];
1617 /* Return the cost of an AND operation. */
1624 /* Anding with a register is a single cycle and instruction. */
1625 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1628 i = INTVAL (XEXP (x, 1));
1632 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1633 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1634 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1640 /* These constants are single cycle extu.[bw] instructions. */
1641 if (i == 0xff || i == 0xffff)
1643 /* Constants that can be used in an and immediate instruction in a single
1644 cycle, but this requires r0, so make it a little more expensive. */
1645 if (CONST_OK_FOR_K08 (i))
1647 /* Constants that can be loaded with a mov immediate and an and.
1648 This case is probably unnecessary. */
1649 if (CONST_OK_FOR_I08 (i))
1651 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1652 This case is probably unnecessary. */
1656 /* Return the cost of an addition or a subtraction. */
1661 /* Adding a register is a single cycle insn. */
1662 if (GET_CODE (XEXP (x, 1)) == REG
1663 || GET_CODE (XEXP (x, 1)) == SUBREG)
1666 /* Likewise for small constants. */
1667 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1668 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1672 switch (GET_CODE (XEXP (x, 1)))
1677 return TARGET_SHMEDIA64 ? 5 : 3;
1680 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1682 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1684 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1692 /* Any other constant requires a 2 cycle pc-relative load plus an
1697 /* Return the cost of a multiply. */
1699 multcosts (rtx x ATTRIBUTE_UNUSED)
1706 /* We have a mul insn, so we can never take more than the mul and the
1707 read of the mac reg, but count more because of the latency and extra
1709 if (TARGET_SMALLCODE)
1714 /* If we're aiming at small code, then just count the number of
1715 insns in a multiply call sequence. */
1716 if (TARGET_SMALLCODE)
1719 /* Otherwise count all the insns in the routine we'd be calling too. */
1723 /* Compute a (partial) cost for rtx X. Return true if the complete
1724 cost has been computed, and false if subexpressions should be
1725 scanned. In either case, *TOTAL contains the cost result. */
1728 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1735 if (INTVAL (x) == 0)
1737 else if (outer_code == AND && and_operand ((x), DImode))
1739 else if ((outer_code == IOR || outer_code == XOR
1740 || outer_code == PLUS)
1741 && CONST_OK_FOR_I10 (INTVAL (x)))
1743 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1744 *total = COSTS_N_INSNS (outer_code != SET);
1745 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1746 *total = COSTS_N_INSNS (2);
1747 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1748 *total = COSTS_N_INSNS (3);
1750 *total = COSTS_N_INSNS (4);
1753 if (CONST_OK_FOR_I08 (INTVAL (x)))
1755 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1756 && CONST_OK_FOR_K08 (INTVAL (x)))
1765 if (TARGET_SHMEDIA64)
1766 *total = COSTS_N_INSNS (4);
1767 else if (TARGET_SHMEDIA32)
1768 *total = COSTS_N_INSNS (2);
1775 *total = COSTS_N_INSNS (4);
1781 *total = COSTS_N_INSNS (addsubcosts (x));
1785 *total = COSTS_N_INSNS (andcosts (x));
1789 *total = COSTS_N_INSNS (multcosts (x));
1795 *total = COSTS_N_INSNS (shiftcosts (x));
1802 *total = COSTS_N_INSNS (20);
1815 /* Compute the cost of an address. For the SH, all valid addresses are
1816 the same cost. Use a slightly higher cost for reg + reg addressing,
1817 since it increases pressure on r0. */
1820 sh_address_cost (rtx X)
1822 return (GET_CODE (X) == PLUS
1823 && ! CONSTANT_P (XEXP (X, 1))
1824 && ! TARGET_SHMEDIA ? 1 : 0);
1827 /* Code to expand a shift. */
1830 gen_ashift (int type, int n, rtx reg)
1832 /* Negative values here come from the shift_amounts array. */
1845 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1849 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1851 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1854 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1859 /* Same for HImode */
1862 gen_ashift_hi (int type, int n, rtx reg)
1864 /* Negative values here come from the shift_amounts array. */
1878 /* We don't have HImode right shift operations because using the
1879 ordinary 32 bit shift instructions for that doesn't generate proper
1880 zero/sign extension.
1881 gen_ashift_hi is only called in contexts where we know that the
1882 sign extension works out correctly. */
1885 if (GET_CODE (reg) == SUBREG)
1887 offset = SUBREG_BYTE (reg);
1888 reg = SUBREG_REG (reg);
1890 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1894 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1899 /* Output RTL to split a constant shift into its component SH constant
1900 shift instructions. */
1903 gen_shifty_op (int code, rtx *operands)
1905 int value = INTVAL (operands[2]);
1908 /* Truncate the shift count in case it is out of bounds. */
1909 value = value & 0x1f;
1913 if (code == LSHIFTRT)
1915 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1916 emit_insn (gen_movt (operands[0]));
1919 else if (code == ASHIFT)
1921 /* There is a two instruction sequence for 31 bit left shifts,
1922 but it requires r0. */
1923 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1925 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1926 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1931 else if (value == 0)
1933 /* This can happen when not optimizing. We must output something here
1934 to prevent the compiler from aborting in final.c after the try_split
1936 emit_insn (gen_nop ());
1940 max = shift_insns[value];
1941 for (i = 0; i < max; i++)
1942 gen_ashift (code, shift_amounts[value][i], operands[0]);
1945 /* Same as above, but optimized for values where the topmost bits don't
1949 gen_shifty_hi_op (int code, rtx *operands)
1951 int value = INTVAL (operands[2]);
1953 void (*gen_fun) (int, int, rtx);
1955 /* This operation is used by and_shl for SImode values with a few
1956 high bits known to be cleared. */
1960 emit_insn (gen_nop ());
1964 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1967 max = ext_shift_insns[value];
1968 for (i = 0; i < max; i++)
1969 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1972 /* When shifting right, emit the shifts in reverse order, so that
1973 solitary negative values come first. */
1974 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1975 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1978 /* Output RTL for an arithmetic right shift. */
1980 /* ??? Rewrite to use super-optimizer sequences. */
1983 expand_ashiftrt (rtx *operands)
1993 if (GET_CODE (operands[2]) != CONST_INT)
1995 rtx count = copy_to_mode_reg (SImode, operands[2]);
1996 emit_insn (gen_negsi2 (count, count));
1997 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2000 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2001 > 1 + SH_DYNAMIC_SHIFT_COST)
2004 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2005 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2009 if (GET_CODE (operands[2]) != CONST_INT)
2012 value = INTVAL (operands[2]) & 31;
2016 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2019 else if (value >= 16 && value <= 19)
2021 wrk = gen_reg_rtx (SImode);
2022 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2025 gen_ashift (ASHIFTRT, 1, wrk);
2026 emit_move_insn (operands[0], wrk);
2029 /* Expand a short sequence inline, longer call a magic routine. */
2030 else if (value <= 5)
2032 wrk = gen_reg_rtx (SImode);
2033 emit_move_insn (wrk, operands[1]);
2035 gen_ashift (ASHIFTRT, 1, wrk);
2036 emit_move_insn (operands[0], wrk);
2040 wrk = gen_reg_rtx (Pmode);
2042 /* Load the value into an arg reg and call a helper. */
2043 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2044 sprintf (func, "__ashiftrt_r4_%d", value);
2045 func_name = get_identifier (func);
2046 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2047 emit_move_insn (wrk, sym);
2048 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2049 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2054 sh_dynamicalize_shift_p (rtx count)
2056 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2059 /* Try to find a good way to implement the combiner pattern
2060 [(set (match_operand:SI 0 "register_operand" "r")
2061 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2062 (match_operand:SI 2 "const_int_operand" "n"))
2063 (match_operand:SI 3 "const_int_operand" "n"))) .
2064 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2065 return 0 for simple right / left or left/right shift combination.
2066 return 1 for a combination of shifts with zero_extend.
2067 return 2 for a combination of shifts with an AND that needs r0.
2068 return 3 for a combination of shifts with an AND that needs an extra
2069 scratch register, when the three highmost bits of the AND mask are clear.
2070 return 4 for a combination of shifts with an AND that needs an extra
2071 scratch register, when any of the three highmost bits of the AND mask
2073 If ATTRP is set, store an initial right shift width in ATTRP[0],
2074 and the instruction length in ATTRP[1] . These values are not valid
2076 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2077 shift_amounts for the last shift value that is to be used before the
2080 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2082 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2083 int left = INTVAL (left_rtx), right;
2085 int cost, best_cost = 10000;
2086 int best_right = 0, best_len = 0;
2090 if (left < 0 || left > 31)
2092 if (GET_CODE (mask_rtx) == CONST_INT)
2093 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2095 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2096 /* Can this be expressed as a right shift / left shift pair? */
2097 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2098 right = exact_log2 (lsb);
2099 mask2 = ~(mask + lsb - 1);
2100 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2101 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2103 best_cost = shift_insns[right] + shift_insns[right + left];
2104 /* mask has no trailing zeroes <==> ! right */
2105 else if (! right && mask2 == ~(lsb2 - 1))
2107 int late_right = exact_log2 (lsb2);
2108 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2110 /* Try to use zero extend. */
2111 if (mask2 == ~(lsb2 - 1))
2115 for (width = 8; width <= 16; width += 8)
2117 /* Can we zero-extend right away? */
2118 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2121 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2122 if (cost < best_cost)
2133 /* ??? Could try to put zero extend into initial right shift,
2134 or even shift a bit left before the right shift. */
2135 /* Determine value of first part of left shift, to get to the
2136 zero extend cut-off point. */
2137 first = width - exact_log2 (lsb2) + right;
2138 if (first >= 0 && right + left - first >= 0)
2140 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2141 + ext_shift_insns[right + left - first];
2142 if (cost < best_cost)
2154 /* Try to use r0 AND pattern */
2155 for (i = 0; i <= 2; i++)
2159 if (! CONST_OK_FOR_K08 (mask >> i))
2161 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2162 if (cost < best_cost)
2167 best_len = cost - 1;
2170 /* Try to use a scratch register to hold the AND operand. */
2171 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2172 for (i = 0; i <= 2; i++)
2176 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2177 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2178 if (cost < best_cost)
2183 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2189 attrp[0] = best_right;
2190 attrp[1] = best_len;
2195 /* This is used in length attributes of the unnamed instructions
2196 corresponding to shl_and_kind return values of 1 and 2. */
2198 shl_and_length (rtx insn)
2200 rtx set_src, left_rtx, mask_rtx;
2203 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2204 left_rtx = XEXP (XEXP (set_src, 0), 1);
2205 mask_rtx = XEXP (set_src, 1);
2206 shl_and_kind (left_rtx, mask_rtx, attributes);
2207 return attributes[1];
2210 /* This is used in length attribute of the and_shl_scratch instruction. */
2213 shl_and_scr_length (rtx insn)
2215 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2216 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2217 rtx op = XEXP (set_src, 0);
2218 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2219 op = XEXP (XEXP (op, 0), 0);
2220 return len + shift_insns[INTVAL (XEXP (op, 1))];
2223 /* Generating rtl? */
2224 extern int rtx_equal_function_value_matters;
2226 /* Generate rtl for instructions for which shl_and_kind advised a particular
2227 method of generating them, i.e. returned zero. */
2230 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2233 unsigned HOST_WIDE_INT mask;
2234 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2235 int right, total_shift;
2236 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2238 right = attributes[0];
2239 total_shift = INTVAL (left_rtx) + right;
2240 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2247 int first = attributes[2];
2252 emit_insn ((mask << right) <= 0xff
2253 ? gen_zero_extendqisi2 (dest,
2254 gen_lowpart (QImode, source))
2255 : gen_zero_extendhisi2 (dest,
2256 gen_lowpart (HImode, source)));
2260 emit_insn (gen_movsi (dest, source));
2264 operands[2] = GEN_INT (right);
2265 gen_shifty_hi_op (LSHIFTRT, operands);
2269 operands[2] = GEN_INT (first);
2270 gen_shifty_hi_op (ASHIFT, operands);
2271 total_shift -= first;
2275 emit_insn (mask <= 0xff
2276 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2277 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2278 if (total_shift > 0)
2280 operands[2] = GEN_INT (total_shift);
2281 gen_shifty_hi_op (ASHIFT, operands);
2286 shift_gen_fun = gen_shifty_op;
2288 /* If the topmost bit that matters is set, set the topmost bits
2289 that don't matter. This way, we might be able to get a shorter
2291 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2292 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2294 /* Don't expand fine-grained when combining, because that will
2295 make the pattern fail. */
2296 if (rtx_equal_function_value_matters
2297 || reload_in_progress || reload_completed)
2301 /* Cases 3 and 4 should be handled by this split
2302 only while combining */
2307 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2310 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2315 operands[2] = GEN_INT (total_shift);
2316 shift_gen_fun (ASHIFT, operands);
2323 if (kind != 4 && total_shift < 16)
2325 neg = -ext_shift_amounts[total_shift][1];
2327 neg -= ext_shift_amounts[total_shift][2];
2331 emit_insn (gen_and_shl_scratch (dest, source,
2334 GEN_INT (total_shift + neg),
2336 emit_insn (gen_movsi (dest, dest));
2343 /* Try to find a good way to implement the combiner pattern
2344 [(set (match_operand:SI 0 "register_operand" "=r")
2345 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2346 (match_operand:SI 2 "const_int_operand" "n")
2347 (match_operand:SI 3 "const_int_operand" "n")
2349 (clobber (reg:SI T_REG))]
2350 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2351 return 0 for simple left / right shift combination.
2352 return 1 for left shift / 8 bit sign extend / left shift.
2353 return 2 for left shift / 16 bit sign extend / left shift.
2354 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2355 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2356 return 5 for left shift / 16 bit sign extend / right shift
2357 return 6 for < 8 bit sign extend / left shift.
2358 return 7 for < 8 bit sign extend / left shift / single right shift.
2359 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2362 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2364 int left, size, insize, ext;
2365 int cost = 0, best_cost;
2368 left = INTVAL (left_rtx);
2369 size = INTVAL (size_rtx);
2370 insize = size - left;
2373 /* Default to left / right shift. */
2375 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2378 /* 16 bit shift / sign extend / 16 bit shift */
2379 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2380 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2381 below, by alternative 3 or something even better. */
2382 if (cost < best_cost)
2388 /* Try a plain sign extend between two shifts. */
2389 for (ext = 16; ext >= insize; ext -= 8)
2393 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2394 if (cost < best_cost)
2396 kind = ext / (unsigned) 8;
2400 /* Check if we can do a sloppy shift with a final signed shift
2401 restoring the sign. */
2402 if (EXT_SHIFT_SIGNED (size - ext))
2403 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2404 /* If not, maybe it's still cheaper to do the second shift sloppy,
2405 and do a final sign extend? */
2406 else if (size <= 16)
2407 cost = ext_shift_insns[ext - insize] + 1
2408 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2411 if (cost < best_cost)
2413 kind = ext / (unsigned) 8 + 2;
2417 /* Check if we can sign extend in r0 */
2420 cost = 3 + shift_insns[left];
2421 if (cost < best_cost)
2426 /* Try the same with a final signed shift. */
2429 cost = 3 + ext_shift_insns[left + 1] + 1;
2430 if (cost < best_cost)
2439 /* Try to use a dynamic shift. */
2440 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2441 if (cost < best_cost)
2452 /* Function to be used in the length attribute of the instructions
2453 implementing this pattern. */
2456 shl_sext_length (rtx insn)
2458 rtx set_src, left_rtx, size_rtx;
2461 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2462 left_rtx = XEXP (XEXP (set_src, 0), 1);
2463 size_rtx = XEXP (set_src, 1);
2464 shl_sext_kind (left_rtx, size_rtx, &cost);
2468 /* Generate rtl for this pattern */
2471 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2474 int left, size, insize, cost;
2477 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2478 left = INTVAL (left_rtx);
2479 size = INTVAL (size_rtx);
2480 insize = size - left;
2488 int ext = kind & 1 ? 8 : 16;
2489 int shift2 = size - ext;
2491 /* Don't expand fine-grained when combining, because that will
2492 make the pattern fail. */
2493 if (! rtx_equal_function_value_matters
2494 && ! reload_in_progress && ! reload_completed)
2496 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2497 emit_insn (gen_movsi (dest, source));
2501 emit_insn (gen_movsi (dest, source));
2505 operands[2] = GEN_INT (ext - insize);
2506 gen_shifty_hi_op (ASHIFT, operands);
2509 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2510 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2515 operands[2] = GEN_INT (shift2);
2516 gen_shifty_op (ASHIFT, operands);
2523 if (EXT_SHIFT_SIGNED (shift2))
2525 operands[2] = GEN_INT (shift2 + 1);
2526 gen_shifty_op (ASHIFT, operands);
2527 operands[2] = const1_rtx;
2528 gen_shifty_op (ASHIFTRT, operands);
2531 operands[2] = GEN_INT (shift2);
2532 gen_shifty_hi_op (ASHIFT, operands);
2536 operands[2] = GEN_INT (-shift2);
2537 gen_shifty_hi_op (LSHIFTRT, operands);
2539 emit_insn (size <= 8
2540 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2541 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2548 if (! rtx_equal_function_value_matters
2549 && ! reload_in_progress && ! reload_completed)
2550 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2554 operands[2] = GEN_INT (16 - insize);
2555 gen_shifty_hi_op (ASHIFT, operands);
2556 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2558 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2560 gen_ashift (ASHIFTRT, 1, dest);
2565 /* Don't expand fine-grained when combining, because that will
2566 make the pattern fail. */
2567 if (! rtx_equal_function_value_matters
2568 && ! reload_in_progress && ! reload_completed)
2570 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2571 emit_insn (gen_movsi (dest, source));
2574 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2575 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2576 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2578 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2579 gen_shifty_op (ASHIFT, operands);
2581 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2589 /* Prefix a symbol_ref name with "datalabel". */
2592 gen_datalabel_ref (rtx sym)
2594 if (GET_CODE (sym) == LABEL_REF)
2595 return gen_rtx_CONST (GET_MODE (sym),
2596 gen_rtx_UNSPEC (GET_MODE (sym),
2600 if (GET_CODE (sym) != SYMBOL_REF)
2607 /* The SH cannot load a large constant into a register, constants have to
2608 come from a pc relative load. The reference of a pc relative load
2609 instruction must be less than 1k infront of the instruction. This
2610 means that we often have to dump a constant inside a function, and
2611 generate code to branch around it.
2613 It is important to minimize this, since the branches will slow things
2614 down and make things bigger.
2616 Worst case code looks like:
2634 We fix this by performing a scan before scheduling, which notices which
2635 instructions need to have their operands fetched from the constant table
2636 and builds the table.
2640 scan, find an instruction which needs a pcrel move. Look forward, find the
2641 last barrier which is within MAX_COUNT bytes of the requirement.
2642 If there isn't one, make one. Process all the instructions between
2643 the find and the barrier.
2645 In the above example, we can tell that L3 is within 1k of L1, so
2646 the first move can be shrunk from the 3 insn+constant sequence into
2647 just 1 insn, and the constant moved to L3 to make:
2658 Then the second move becomes the target for the shortening process. */
2662 rtx value; /* Value in table. */
2663 rtx label; /* Label of value. */
2664 rtx wend; /* End of window. */
2665 enum machine_mode mode; /* Mode of value. */
2667 /* True if this constant is accessed as part of a post-increment
2668 sequence. Note that HImode constants are never accessed in this way. */
2669 bool part_of_sequence_p;
2672 /* The maximum number of constants that can fit into one pool, since
2673 the pc relative range is 0...1020 bytes and constants are at least 4
2676 #define MAX_POOL_SIZE (1020/4)
2677 static pool_node pool_vector[MAX_POOL_SIZE];
2678 static int pool_size;
2679 static rtx pool_window_label;
2680 static int pool_window_last;
2682 /* ??? If we need a constant in HImode which is the truncated value of a
2683 constant we need in SImode, we could combine the two entries thus saving
2684 two bytes. Is this common enough to be worth the effort of implementing
2687 /* ??? This stuff should be done at the same time that we shorten branches.
2688 As it is now, we must assume that all branches are the maximum size, and
2689 this causes us to almost always output constant pools sooner than
2692 /* Add a constant to the pool and return its label. */
2695 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2698 rtx lab, new, ref, newref;
2700 /* First see if we've already got it. */
2701 for (i = 0; i < pool_size; i++)
2703 if (x->code == pool_vector[i].value->code
2704 && mode == pool_vector[i].mode)
2706 if (x->code == CODE_LABEL)
2708 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2711 if (rtx_equal_p (x, pool_vector[i].value))
2716 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2718 new = gen_label_rtx ();
2719 LABEL_REFS (new) = pool_vector[i].label;
2720 pool_vector[i].label = lab = new;
2722 if (lab && pool_window_label)
2724 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2725 ref = pool_vector[pool_window_last].wend;
2726 LABEL_NEXTREF (newref) = ref;
2727 pool_vector[pool_window_last].wend = newref;
2730 pool_window_label = new;
2731 pool_window_last = i;
2737 /* Need a new one. */
2738 pool_vector[pool_size].value = x;
2739 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2742 pool_vector[pool_size - 1].part_of_sequence_p = true;
2745 lab = gen_label_rtx ();
2746 pool_vector[pool_size].mode = mode;
2747 pool_vector[pool_size].label = lab;
2748 pool_vector[pool_size].wend = NULL_RTX;
2749 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2750 if (lab && pool_window_label)
2752 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2753 ref = pool_vector[pool_window_last].wend;
2754 LABEL_NEXTREF (newref) = ref;
2755 pool_vector[pool_window_last].wend = newref;
2758 pool_window_label = lab;
2759 pool_window_last = pool_size;
2764 /* Output the literal table. START, if nonzero, is the first instruction
2765 this table is needed for, and also indicates that there is at least one
2766 casesi_worker_2 instruction; We have to emit the operand3 labels from
2767 these insns at a 4-byte aligned position. BARRIER is the barrier
2768 after which we are to place the table. */
2771 dump_table (rtx start, rtx barrier)
2779 /* Do two passes, first time dump out the HI sized constants. */
2781 for (i = 0; i < pool_size; i++)
2783 pool_node *p = &pool_vector[i];
2785 if (p->mode == HImode)
2789 scan = emit_insn_after (gen_align_2 (), scan);
2792 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2793 scan = emit_label_after (lab, scan);
2794 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2796 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2798 lab = XEXP (ref, 0);
2799 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2802 else if (p->mode == DFmode)
2810 scan = emit_insn_after (gen_align_4 (), scan);
2812 for (; start != barrier; start = NEXT_INSN (start))
2813 if (GET_CODE (start) == INSN
2814 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2816 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2817 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2819 scan = emit_label_after (lab, scan);
2822 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2824 rtx align_insn = NULL_RTX;
2826 scan = emit_label_after (gen_label_rtx (), scan);
2827 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2830 for (i = 0; i < pool_size; i++)
2832 pool_node *p = &pool_vector[i];
2840 if (align_insn && !p->part_of_sequence_p)
2842 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2843 emit_label_before (lab, align_insn);
2844 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2846 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2848 lab = XEXP (ref, 0);
2849 emit_insn_before (gen_consttable_window_end (lab),
2852 delete_insn (align_insn);
2853 align_insn = NULL_RTX;
2858 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2859 scan = emit_label_after (lab, scan);
2860 scan = emit_insn_after (gen_consttable_4 (p->value,
2862 need_align = ! need_align;
2868 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2873 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2874 scan = emit_label_after (lab, scan);
2875 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2883 if (p->mode != HImode)
2885 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2887 lab = XEXP (ref, 0);
2888 scan = emit_insn_after (gen_consttable_window_end (lab),
2897 for (i = 0; i < pool_size; i++)
2899 pool_node *p = &pool_vector[i];
2910 scan = emit_label_after (gen_label_rtx (), scan);
2911 scan = emit_insn_after (gen_align_4 (), scan);
2913 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2914 scan = emit_label_after (lab, scan);
2915 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2923 scan = emit_label_after (gen_label_rtx (), scan);
2924 scan = emit_insn_after (gen_align_4 (), scan);
2926 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2927 scan = emit_label_after (lab, scan);
2928 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2936 if (p->mode != HImode)
2938 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2940 lab = XEXP (ref, 0);
2941 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2946 scan = emit_insn_after (gen_consttable_end (), scan);
2947 scan = emit_barrier_after (scan);
2949 pool_window_label = NULL_RTX;
2950 pool_window_last = 0;
2953 /* Return nonzero if constant would be an ok source for a
2954 mov.w instead of a mov.l. */
2959 return (GET_CODE (src) == CONST_INT
2960 && INTVAL (src) >= -32768
2961 && INTVAL (src) <= 32767);
2964 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2966 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2967 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2968 need to fix it if the input value is CONST_OK_FOR_I08. */
2971 broken_move (rtx insn)
2973 if (GET_CODE (insn) == INSN)
2975 rtx pat = PATTERN (insn);
2976 if (GET_CODE (pat) == PARALLEL)
2977 pat = XVECEXP (pat, 0, 0);
2978 if (GET_CODE (pat) == SET
2979 /* We can load any 8 bit value if we don't care what the high
2980 order bits end up as. */
2981 && GET_MODE (SET_DEST (pat)) != QImode
2982 && (CONSTANT_P (SET_SRC (pat))
2983 /* Match mova_const. */
2984 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2985 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2986 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2988 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2989 && (fp_zero_operand (SET_SRC (pat))
2990 || fp_one_operand (SET_SRC (pat)))
2991 /* ??? If this is a -m4 or -m4-single compilation, in general
2992 we don't know the current setting of fpscr, so disable fldi.
2993 There is an exception if this was a register-register move
2994 before reload - and hence it was ascertained that we have
2995 single precision setting - and in a post-reload optimization
2996 we changed this to do a constant load. In that case
2997 we don't have an r0 clobber, hence we must use fldi. */
2998 && (! TARGET_SH4 || TARGET_FMOVD
2999 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3001 && GET_CODE (SET_DEST (pat)) == REG
3002 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3003 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3004 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3014 return (GET_CODE (insn) == INSN
3015 && GET_CODE (PATTERN (insn)) == SET
3016 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3017 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3018 /* Don't match mova_const. */
3019 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3022 /* Fix up a mova from a switch that went out of range. */
3024 fixup_mova (rtx mova)
3028 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3029 INSN_CODE (mova) = -1;
3034 rtx lab = gen_label_rtx ();
3035 rtx wpat, wpat0, wpat1, wsrc, diff;
3039 worker = NEXT_INSN (worker);
3041 || GET_CODE (worker) == CODE_LABEL
3042 || GET_CODE (worker) == JUMP_INSN)
3044 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3045 wpat = PATTERN (worker);
3046 wpat0 = XVECEXP (wpat, 0, 0);
3047 wpat1 = XVECEXP (wpat, 0, 1);
3048 wsrc = SET_SRC (wpat0);
3049 PATTERN (worker) = (gen_casesi_worker_2
3050 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3051 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3053 INSN_CODE (worker) = -1;
3054 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3055 gen_rtx_LABEL_REF (Pmode, lab));
3056 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3057 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3058 INSN_CODE (mova) = -1;
3062 /* Find the last barrier from insn FROM which is close enough to hold the
3063 constant pool. If we can't find one, then create one near the end of
3067 find_barrier (int num_mova, rtx mova, rtx from)
3076 int leading_mova = num_mova;
3077 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3081 /* For HImode: range is 510, add 4 because pc counts from address of
3082 second instruction after this one, subtract 2 for the jump instruction
3083 that we may need to emit before the table, subtract 2 for the instruction
3084 that fills the jump delay slot (in very rare cases, reorg will take an
3085 instruction from after the constant pool or will leave the delay slot
3086 empty). This gives 510.
3087 For SImode: range is 1020, add 4 because pc counts from address of
3088 second instruction after this one, subtract 2 in case pc is 2 byte
3089 aligned, subtract 2 for the jump instruction that we may need to emit
3090 before the table, subtract 2 for the instruction that fills the jump
3091 delay slot. This gives 1018. */
3093 /* The branch will always be shortened now that the reference address for
3094 forward branches is the successor address, thus we need no longer make
3095 adjustments to the [sh]i_limit for -O0. */
3100 while (from && count_si < si_limit && count_hi < hi_limit)
3102 int inc = get_attr_length (from);
3105 if (GET_CODE (from) == CODE_LABEL)
3108 new_align = 1 << label_to_alignment (from);
3109 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3110 new_align = 1 << barrier_align (from);
3116 if (GET_CODE (from) == BARRIER)
3119 found_barrier = from;
3121 /* If we are at the end of the function, or in front of an alignment
3122 instruction, we need not insert an extra alignment. We prefer
3123 this kind of barrier. */
3124 if (barrier_align (from) > 2)
3125 good_barrier = from;
3128 if (broken_move (from))
3131 enum machine_mode mode;
3133 pat = PATTERN (from);
3134 if (GET_CODE (pat) == PARALLEL)
3135 pat = XVECEXP (pat, 0, 0);
3136 src = SET_SRC (pat);
3137 dst = SET_DEST (pat);
3138 mode = GET_MODE (dst);
3140 /* We must explicitly check the mode, because sometimes the
3141 front end will generate code to load unsigned constants into
3142 HImode targets without properly sign extending them. */
3144 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3147 /* We put the short constants before the long constants, so
3148 we must count the length of short constants in the range
3149 for the long constants. */
3150 /* ??? This isn't optimal, but is easy to do. */
3155 /* We dump DF/DI constants before SF/SI ones, because
3156 the limit is the same, but the alignment requirements
3157 are higher. We may waste up to 4 additional bytes
3158 for alignment, and the DF/DI constant may have
3159 another SF/SI constant placed before it. */
3160 if (TARGET_SHCOMPACT
3162 && (mode == DFmode || mode == DImode))
3167 while (si_align > 2 && found_si + si_align - 2 > count_si)
3169 if (found_si > count_si)
3170 count_si = found_si;
3171 found_si += GET_MODE_SIZE (mode);
3173 si_limit -= GET_MODE_SIZE (mode);
3176 /* See the code in machine_dependent_reorg, which has a similar if
3177 statement that generates a new mova insn in many cases. */
3178 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3188 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3190 if (found_si > count_si)
3191 count_si = found_si;
3193 else if (GET_CODE (from) == JUMP_INSN
3194 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3195 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3199 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3201 /* We have just passed the barrier in front of the
3202 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3203 the ADDR_DIFF_VEC is accessed as data, just like our pool
3204 constants, this is a good opportunity to accommodate what
3205 we have gathered so far.
3206 If we waited any longer, we could end up at a barrier in
3207 front of code, which gives worse cache usage for separated
3208 instruction / data caches. */
3209 good_barrier = found_barrier;
3214 rtx body = PATTERN (from);
3215 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3218 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3219 else if (GET_CODE (from) == JUMP_INSN
3221 && ! TARGET_SMALLCODE)
3227 if (new_align > si_align)
3229 si_limit -= (count_si - 1) & (new_align - si_align);
3230 si_align = new_align;
3232 count_si = (count_si + new_align - 1) & -new_align;
3237 if (new_align > hi_align)
3239 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3240 hi_align = new_align;
3242 count_hi = (count_hi + new_align - 1) & -new_align;
3244 from = NEXT_INSN (from);
3251 /* Try as we might, the leading mova is out of range. Change
3252 it into a load (which will become a pcload) and retry. */
3254 return find_barrier (0, 0, mova);
3258 /* Insert the constant pool table before the mova instruction,
3259 to prevent the mova label reference from going out of range. */
3261 good_barrier = found_barrier = barrier_before_mova;
3267 if (good_barrier && next_real_insn (found_barrier))
3268 found_barrier = good_barrier;
3272 /* We didn't find a barrier in time to dump our stuff,
3273 so we'll make one. */
3274 rtx label = gen_label_rtx ();
3276 /* If we exceeded the range, then we must back up over the last
3277 instruction we looked at. Otherwise, we just need to undo the
3278 NEXT_INSN at the end of the loop. */
3279 if (count_hi > hi_limit || count_si > si_limit)
3280 from = PREV_INSN (PREV_INSN (from));
3282 from = PREV_INSN (from);
3284 /* Walk back to be just before any jump or label.
3285 Putting it before a label reduces the number of times the branch
3286 around the constant pool table will be hit. Putting it before
3287 a jump makes it more likely that the bra delay slot will be
3289 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3290 || GET_CODE (from) == CODE_LABEL)
3291 from = PREV_INSN (from);
3293 from = emit_jump_insn_after (gen_jump (label), from);
3294 JUMP_LABEL (from) = label;
3295 LABEL_NUSES (label) = 1;
3296 found_barrier = emit_barrier_after (from);
3297 emit_label_after (label, found_barrier);
3300 return found_barrier;
3303 /* If the instruction INSN is implemented by a special function, and we can
3304 positively find the register that is used to call the sfunc, and this
3305 register is not used anywhere else in this instruction - except as the
3306 destination of a set, return this register; else, return 0. */
3308 sfunc_uses_reg (rtx insn)
3311 rtx pattern, part, reg_part, reg;
3313 if (GET_CODE (insn) != INSN)
3315 pattern = PATTERN (insn);
3316 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3319 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3321 part = XVECEXP (pattern, 0, i);
3322 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3327 reg = XEXP (reg_part, 0);
3328 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3330 part = XVECEXP (pattern, 0, i);
3331 if (part == reg_part || GET_CODE (part) == CLOBBER)
3333 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3334 && GET_CODE (SET_DEST (part)) == REG)
3335 ? SET_SRC (part) : part)))
3341 /* See if the only way in which INSN uses REG is by calling it, or by
3342 setting it while calling it. Set *SET to a SET rtx if the register
3346 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3352 reg2 = sfunc_uses_reg (insn);
3353 if (reg2 && REGNO (reg2) == REGNO (reg))
3355 pattern = single_set (insn);
3357 && GET_CODE (SET_DEST (pattern)) == REG
3358 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3362 if (GET_CODE (insn) != CALL_INSN)
3364 /* We don't use rtx_equal_p because we don't care if the mode is
3366 pattern = single_set (insn);
3368 && GET_CODE (SET_DEST (pattern)) == REG
3369 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3375 par = PATTERN (insn);
3376 if (GET_CODE (par) == PARALLEL)
3377 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3379 part = XVECEXP (par, 0, i);
3380 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3383 return reg_mentioned_p (reg, SET_SRC (pattern));
3389 pattern = PATTERN (insn);
3391 if (GET_CODE (pattern) == PARALLEL)
3395 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3396 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3398 pattern = XVECEXP (pattern, 0, 0);
3401 if (GET_CODE (pattern) == SET)
3403 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3405 /* We don't use rtx_equal_p, because we don't care if the
3406 mode is different. */
3407 if (GET_CODE (SET_DEST (pattern)) != REG
3408 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3414 pattern = SET_SRC (pattern);
3417 if (GET_CODE (pattern) != CALL
3418 || GET_CODE (XEXP (pattern, 0)) != MEM
3419 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3425 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3426 general registers. Bits 0..15 mean that the respective registers
3427 are used as inputs in the instruction. Bits 16..31 mean that the
3428 registers 0..15, respectively, are used as outputs, or are clobbered.
3429 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3431 regs_used (rtx x, int is_dest)
3439 code = GET_CODE (x);
3444 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3445 << (REGNO (x) + is_dest));
3449 rtx y = SUBREG_REG (x);
3451 if (GET_CODE (y) != REG)
3454 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3456 subreg_regno_offset (REGNO (y),
3459 GET_MODE (x)) + is_dest));
3463 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3465 /* If there was a return value, it must have been indicated with USE. */
3480 fmt = GET_RTX_FORMAT (code);
3482 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3487 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3488 used |= regs_used (XVECEXP (x, i, j), is_dest);
3490 else if (fmt[i] == 'e')
3491 used |= regs_used (XEXP (x, i), is_dest);
3496 /* Create an instruction that prevents redirection of a conditional branch
3497 to the destination of the JUMP with address ADDR.
3498 If the branch needs to be implemented as an indirect jump, try to find
3499 a scratch register for it.
3500 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3501 If any preceding insn that doesn't fit into a delay slot is good enough,
3502 pass 1. Pass 2 if a definite blocking insn is needed.
3503 -1 is used internally to avoid deep recursion.
3504 If a blocking instruction is made or recognized, return it. */
3507 gen_block_redirect (rtx jump, int addr, int need_block)
3510 rtx prev = prev_nonnote_insn (jump);
3513 /* First, check if we already have an instruction that satisfies our need. */
3514 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3516 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3518 if (GET_CODE (PATTERN (prev)) == USE
3519 || GET_CODE (PATTERN (prev)) == CLOBBER
3520 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3522 else if ((need_block &= ~1) < 0)
3524 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3527 if (GET_CODE (PATTERN (jump)) == RETURN)
3531 /* Reorg even does nasty things with return insns that cause branches
3532 to go out of range - see find_end_label and callers. */
3533 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3535 /* We can't use JUMP_LABEL here because it might be undefined
3536 when not optimizing. */
3537 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3538 /* If the branch is out of range, try to find a scratch register for it. */
3540 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3544 /* Don't look for the stack pointer as a scratch register,
3545 it would cause trouble if an interrupt occurred. */
3546 unsigned try = 0x7fff, used;
3547 int jump_left = flag_expensive_optimizations + 1;
3549 /* It is likely that the most recent eligible instruction is wanted for
3550 the delay slot. Therefore, find out which registers it uses, and
3551 try to avoid using them. */
3553 for (scan = jump; (scan = PREV_INSN (scan)); )
3557 if (INSN_DELETED_P (scan))
3559 code = GET_CODE (scan);
3560 if (code == CODE_LABEL || code == JUMP_INSN)
3563 && GET_CODE (PATTERN (scan)) != USE
3564 && GET_CODE (PATTERN (scan)) != CLOBBER
3565 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3567 try &= ~regs_used (PATTERN (scan), 0);
3571 for (used = dead = 0, scan = JUMP_LABEL (jump);
3572 (scan = NEXT_INSN (scan)); )
3576 if (INSN_DELETED_P (scan))
3578 code = GET_CODE (scan);
3581 used |= regs_used (PATTERN (scan), 0);
3582 if (code == CALL_INSN)
3583 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3584 dead |= (used >> 16) & ~used;
3590 if (code == JUMP_INSN)
3592 if (jump_left-- && simplejump_p (scan))
3593 scan = JUMP_LABEL (scan);
3599 /* Mask out the stack pointer again, in case it was
3600 the only 'free' register we have found. */
3603 /* If the immediate destination is still in range, check for possible
3604 threading with a jump beyond the delay slot insn.
3605 Don't check if we are called recursively; the jump has been or will be
3606 checked in a different invocation then. */
3608 else if (optimize && need_block >= 0)
3610 rtx next = next_active_insn (next_active_insn (dest));
3611 if (next && GET_CODE (next) == JUMP_INSN
3612 && GET_CODE (PATTERN (next)) == SET
3613 && recog_memoized (next) == CODE_FOR_jump_compact)
3615 dest = JUMP_LABEL (next);
3617 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3619 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3625 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3627 /* It would be nice if we could convert the jump into an indirect
3628 jump / far branch right now, and thus exposing all constituent
3629 instructions to further optimization. However, reorg uses
3630 simplejump_p to determine if there is an unconditional jump where
3631 it should try to schedule instructions from the target of the
3632 branch; simplejump_p fails for indirect jumps even if they have
3634 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3635 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3637 /* ??? We would like this to have the scope of the jump, but that
3638 scope will change when a delay slot insn of an inner scope is added.
3639 Hence, after delay slot scheduling, we'll have to expect
3640 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3643 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3644 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3647 else if (need_block)
3648 /* We can't use JUMP_LABEL here because it might be undefined
3649 when not optimizing. */
3650 return emit_insn_before (gen_block_branch_redirect
3651 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3656 #define CONDJUMP_MIN -252
3657 #define CONDJUMP_MAX 262
3660 /* A label (to be placed) in front of the jump
3661 that jumps to our ultimate destination. */
3663 /* Where we are going to insert it if we cannot move the jump any farther,
3664 or the jump itself if we have picked up an existing jump. */
3666 /* The ultimate destination. */
3668 struct far_branch *prev;
3669 /* If the branch has already been created, its address;
3670 else the address of its first prospective user. */
3674 static void gen_far_branch (struct far_branch *);
3675 enum mdep_reorg_phase_e mdep_reorg_phase;
3677 gen_far_branch (struct far_branch *bp)
3679 rtx insn = bp->insert_place;
3681 rtx label = gen_label_rtx ();
3683 emit_label_after (label, insn);
3686 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3687 LABEL_NUSES (bp->far_label)++;
3690 jump = emit_jump_insn_after (gen_return (), insn);
3691 /* Emit a barrier so that reorg knows that any following instructions
3692 are not reachable via a fall-through path.
3693 But don't do this when not optimizing, since we wouldn't suppress the
3694 alignment for the barrier then, and could end up with out-of-range
3695 pc-relative loads. */
3697 emit_barrier_after (jump);
3698 emit_label_after (bp->near_label, insn);
3699 JUMP_LABEL (jump) = bp->far_label;
3700 if (! invert_jump (insn, label, 1))
3702 /* If we are branching around a jump (rather than a return), prevent
3703 reorg from using an insn from the jump target as the delay slot insn -
3704 when reorg did this, it pessimized code (we rather hide the delay slot)
3705 and it could cause branches to go out of range. */
3708 (gen_stuff_delay_slot
3709 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3710 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3712 /* Prevent reorg from undoing our splits. */
3713 gen_block_redirect (jump, bp->address += 2, 2);
3716 /* Fix up ADDR_DIFF_VECs. */
3718 fixup_addr_diff_vecs (rtx first)
3722 for (insn = first; insn; insn = NEXT_INSN (insn))
3724 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3726 if (GET_CODE (insn) != JUMP_INSN
3727 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3729 pat = PATTERN (insn);
3730 vec_lab = XEXP (XEXP (pat, 0), 0);
3732 /* Search the matching casesi_jump_2. */
3733 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3735 if (GET_CODE (prev) != JUMP_INSN)
3737 prevpat = PATTERN (prev);
3738 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3740 x = XVECEXP (prevpat, 0, 1);
3741 if (GET_CODE (x) != USE)
3744 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3748 /* Emit the reference label of the braf where it belongs, right after
3749 the casesi_jump_2 (i.e. braf). */
3750 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3751 emit_label_after (braf_label, prev);
3753 /* Fix up the ADDR_DIF_VEC to be relative
3754 to the reference address of the braf. */
3755 XEXP (XEXP (pat, 0), 0) = braf_label;
3759 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3760 a barrier. Return the base 2 logarithm of the desired alignment. */
3762 barrier_align (rtx barrier_or_label)
3764 rtx next = next_real_insn (barrier_or_label), pat, prev;
3765 int slot, credit, jump_to_next = 0;
3770 pat = PATTERN (next);
3772 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3775 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3776 /* This is a barrier in front of a constant table. */
3779 prev = prev_real_insn (barrier_or_label);
3780 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3782 pat = PATTERN (prev);
3783 /* If this is a very small table, we want to keep the alignment after
3784 the table to the minimum for proper code alignment. */
3785 return ((TARGET_SMALLCODE
3786 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3787 <= (unsigned) 1 << (CACHE_LOG - 2)))
3788 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3791 if (TARGET_SMALLCODE)
3794 if (! TARGET_SH2 || ! optimize)
3795 return align_jumps_log;
3797 /* When fixing up pcloads, a constant table might be inserted just before
3798 the basic block that ends with the barrier. Thus, we can't trust the
3799 instruction lengths before that. */
3800 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3802 /* Check if there is an immediately preceding branch to the insn beyond
3803 the barrier. We must weight the cost of discarding useful information
3804 from the current cache line when executing this branch and there is
3805 an alignment, against that of fetching unneeded insn in front of the
3806 branch target when there is no alignment. */
3808 /* There are two delay_slot cases to consider. One is the simple case
3809 where the preceding branch is to the insn beyond the barrier (simple
3810 delay slot filling), and the other is where the preceding branch has
3811 a delay slot that is a duplicate of the insn after the barrier
3812 (fill_eager_delay_slots) and the branch is to the insn after the insn
3813 after the barrier. */
3815 /* PREV is presumed to be the JUMP_INSN for the barrier under
3816 investigation. Skip to the insn before it. */
3817 prev = prev_real_insn (prev);
3819 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3820 credit >= 0 && prev && GET_CODE (prev) == INSN;
3821 prev = prev_real_insn (prev))
3824 if (GET_CODE (PATTERN (prev)) == USE
3825 || GET_CODE (PATTERN (prev)) == CLOBBER)
3827 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3829 prev = XVECEXP (PATTERN (prev), 0, 1);
3830 if (INSN_UID (prev) == INSN_UID (next))
3832 /* Delay slot was filled with insn at jump target. */
3839 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3841 credit -= get_attr_length (prev);
3844 && GET_CODE (prev) == JUMP_INSN
3845 && JUMP_LABEL (prev))
3849 || next_real_insn (JUMP_LABEL (prev)) == next
3850 /* If relax_delay_slots() decides NEXT was redundant
3851 with some previous instruction, it will have
3852 redirected PREV's jump to the following insn. */
3853 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3854 /* There is no upper bound on redundant instructions
3855 that might have been skipped, but we must not put an
3856 alignment where none had been before. */
3857 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3859 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3860 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3861 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3863 rtx pat = PATTERN (prev);
3864 if (GET_CODE (pat) == PARALLEL)
3865 pat = XVECEXP (pat, 0, 0);
3866 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3872 return align_jumps_log;
3875 /* If we are inside a phony loop, almost any kind of label can turn up as the
3876 first one in the loop. Aligning a braf label causes incorrect switch
3877 destination addresses; we can detect braf labels because they are
3878 followed by a BARRIER.
3879 Applying loop alignment to small constant or switch tables is a waste
3880 of space, so we suppress this too. */
3882 sh_loop_align (rtx label)
3887 next = next_nonnote_insn (next);
3888 while (next && GET_CODE (next) == CODE_LABEL);
3892 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3893 || recog_memoized (next) == CODE_FOR_consttable_2)
3896 return align_loops_log;
3899 /* Do a final pass over the function, just before delayed branch
3905 rtx first, insn, mova = NULL_RTX;
3907 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3908 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3910 first = get_insns ();
3912 /* We must split call insns before introducing `mova's. If we're
3913 optimizing, they'll have already been split. Otherwise, make
3914 sure we don't split them too late. */
3916 split_all_insns_noflow ();
3921 /* If relaxing, generate pseudo-ops to associate function calls with
3922 the symbols they call. It does no harm to not generate these
3923 pseudo-ops. However, when we can generate them, it enables to
3924 linker to potentially relax the jsr to a bsr, and eliminate the
3925 register load and, possibly, the constant pool entry. */
3927 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3930 /* Remove all REG_LABEL notes. We want to use them for our own
3931 purposes. This works because none of the remaining passes
3932 need to look at them.
3934 ??? But it may break in the future. We should use a machine
3935 dependent REG_NOTE, or some other approach entirely. */
3936 for (insn = first; insn; insn = NEXT_INSN (insn))
3942 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3943 remove_note (insn, note);
3947 for (insn = first; insn; insn = NEXT_INSN (insn))
3949 rtx pattern, reg, link, set, scan, dies, label;
3950 int rescan = 0, foundinsn = 0;
3952 if (GET_CODE (insn) == CALL_INSN)
3954 pattern = PATTERN (insn);
3956 if (GET_CODE (pattern) == PARALLEL)
3957 pattern = XVECEXP (pattern, 0, 0);
3958 if (GET_CODE (pattern) == SET)
3959 pattern = SET_SRC (pattern);
3961 if (GET_CODE (pattern) != CALL
3962 || GET_CODE (XEXP (pattern, 0)) != MEM)
3965 reg = XEXP (XEXP (pattern, 0), 0);
3969 reg = sfunc_uses_reg (insn);
3974 if (GET_CODE (reg) != REG)
3977 /* This is a function call via REG. If the only uses of REG
3978 between the time that it is set and the time that it dies
3979 are in function calls, then we can associate all the
3980 function calls with the setting of REG. */
3982 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3984 if (REG_NOTE_KIND (link) != 0)
3986 set = single_set (XEXP (link, 0));
3987 if (set && rtx_equal_p (reg, SET_DEST (set)))
3989 link = XEXP (link, 0);
3996 /* ??? Sometimes global register allocation will have
3997 deleted the insn pointed to by LOG_LINKS. Try
3998 scanning backward to find where the register is set. */
3999 for (scan = PREV_INSN (insn);
4000 scan && GET_CODE (scan) != CODE_LABEL;
4001 scan = PREV_INSN (scan))
4003 if (! INSN_P (scan))
4006 if (! reg_mentioned_p (reg, scan))
4009 if (noncall_uses_reg (reg, scan, &set))
4023 /* The register is set at LINK. */
4025 /* We can only optimize the function call if the register is
4026 being set to a symbol. In theory, we could sometimes
4027 optimize calls to a constant location, but the assembler
4028 and linker do not support that at present. */
4029 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4030 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4033 /* Scan forward from LINK to the place where REG dies, and
4034 make sure that the only insns which use REG are
4035 themselves function calls. */
4037 /* ??? This doesn't work for call targets that were allocated
4038 by reload, since there may not be a REG_DEAD note for the
4042 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4046 /* Don't try to trace forward past a CODE_LABEL if we haven't
4047 seen INSN yet. Ordinarily, we will only find the setting insn
4048 in LOG_LINKS if it is in the same basic block. However,
4049 cross-jumping can insert code labels in between the load and
4050 the call, and can result in situations where a single call
4051 insn may have two targets depending on where we came from. */
4053 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4056 if (! INSN_P (scan))
4059 /* Don't try to trace forward past a JUMP. To optimize
4060 safely, we would have to check that all the
4061 instructions at the jump destination did not use REG. */
4063 if (GET_CODE (scan) == JUMP_INSN)
4066 if (! reg_mentioned_p (reg, scan))
4069 if (noncall_uses_reg (reg, scan, &scanset))
4076 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4078 /* There is a function call to this register other
4079 than the one we are checking. If we optimize
4080 this call, we need to rescan again below. */
4084 /* ??? We shouldn't have to worry about SCANSET here.
4085 We should just be able to check for a REG_DEAD note
4086 on a function call. However, the REG_DEAD notes are
4087 apparently not dependable around libcalls; c-torture
4088 execute/920501-2 is a test case. If SCANSET is set,
4089 then this insn sets the register, so it must have
4090 died earlier. Unfortunately, this will only handle
4091 the cases in which the register is, in fact, set in a
4094 /* ??? We shouldn't have to use FOUNDINSN here.
4095 However, the LOG_LINKS fields are apparently not
4096 entirely reliable around libcalls;
4097 newlib/libm/math/e_pow.c is a test case. Sometimes
4098 an insn will appear in LOG_LINKS even though it is
4099 not the most recent insn which sets the register. */
4103 || find_reg_note (scan, REG_DEAD, reg)))
4112 /* Either there was a branch, or some insn used REG
4113 other than as a function call address. */
4117 /* Create a code label, and put it in a REG_LABEL note on
4118 the insn which sets the register, and on each call insn
4119 which uses the register. In final_prescan_insn we look
4120 for the REG_LABEL notes, and output the appropriate label
4123 label = gen_label_rtx ();
4124 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4126 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4135 scan = NEXT_INSN (scan);
4137 && ((GET_CODE (scan) == CALL_INSN
4138 && reg_mentioned_p (reg, scan))
4139 || ((reg2 = sfunc_uses_reg (scan))
4140 && REGNO (reg2) == REGNO (reg))))
4142 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4144 while (scan != dies);
4150 fixup_addr_diff_vecs (first);
4154 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4155 shorten_branches (first);
4157 /* Scan the function looking for move instructions which have to be
4158 changed to pc-relative loads and insert the literal tables. */
4160 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4161 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4165 /* ??? basic block reordering can move a switch table dispatch
4166 below the switch table. Check if that has happened.
4167 We only have the addresses available when optimizing; but then,
4168 this check shouldn't be needed when not optimizing. */
4169 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4171 && (INSN_ADDRESSES (INSN_UID (insn))
4172 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4174 /* Change the mova into a load.
4175 broken_move will then return true for it. */
4178 else if (! num_mova++)
4181 else if (GET_CODE (insn) == JUMP_INSN
4182 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4190 /* Some code might have been inserted between the mova and
4191 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4192 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4193 total += get_attr_length (scan);
4195 /* range of mova is 1020, add 4 because pc counts from address of
4196 second instruction after this one, subtract 2 in case pc is 2
4197 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4198 cancels out with alignment effects of the mova itself. */
4201 /* Change the mova into a load, and restart scanning
4202 there. broken_move will then return true for mova. */
4207 if (broken_move (insn)
4208 || (GET_CODE (insn) == INSN
4209 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4212 /* Scan ahead looking for a barrier to stick the constant table
4214 rtx barrier = find_barrier (num_mova, mova, insn);
4215 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4216 int need_aligned_label = 0;
4218 if (num_mova && ! mova_p (mova))
4220 /* find_barrier had to change the first mova into a
4221 pcload; thus, we have to start with this new pcload. */
4225 /* Now find all the moves between the points and modify them. */
4226 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4228 if (GET_CODE (scan) == CODE_LABEL)
4230 if (GET_CODE (scan) == INSN
4231 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4232 need_aligned_label = 1;
4233 if (broken_move (scan))
4235 rtx *patp = &PATTERN (scan), pat = *patp;
4239 enum machine_mode mode;
4241 if (GET_CODE (pat) == PARALLEL)
4242 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4243 src = SET_SRC (pat);
4244 dst = SET_DEST (pat);
4245 mode = GET_MODE (dst);
4247 if (mode == SImode && hi_const (src)
4248 && REGNO (dst) != FPUL_REG)
4253 while (GET_CODE (dst) == SUBREG)
4255 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4256 GET_MODE (SUBREG_REG (dst)),
4259 dst = SUBREG_REG (dst);
4261 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4263 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4265 /* This must be an insn that clobbers r0. */
4266 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4267 XVECLEN (PATTERN (scan), 0)
4269 rtx clobber = *clobberp;
4271 if (GET_CODE (clobber) != CLOBBER
4272 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4276 && reg_set_between_p (r0_rtx, last_float_move, scan))
4280 && GET_MODE_SIZE (mode) != 4
4281 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4283 lab = add_constant (src, mode, last_float);
4285 emit_insn_before (gen_mova (lab), scan);
4288 /* There will be a REG_UNUSED note for r0 on
4289 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4290 lest reorg:mark_target_live_regs will not
4291 consider r0 to be used, and we end up with delay
4292 slot insn in front of SCAN that clobbers r0. */
4294 = find_regno_note (last_float_move, REG_UNUSED, 0);
4296 /* If we are not optimizing, then there may not be
4299 PUT_MODE (note, REG_INC);
4301 *last_float_addr = r0_inc_rtx;
4303 last_float_move = scan;
4305 newsrc = gen_rtx_MEM (mode,
4306 (((TARGET_SH4 && ! TARGET_FMOVD)
4307 || REGNO (dst) == FPUL_REG)
4310 last_float_addr = &XEXP (newsrc, 0);
4312 /* Remove the clobber of r0. */
4313 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4314 gen_rtx_SCRATCH (Pmode));
4315 RTX_UNCHANGING_P (newsrc) = 1;
4317 /* This is a mova needing a label. Create it. */
4318 else if (GET_CODE (src) == UNSPEC
4319 && XINT (src, 1) == UNSPEC_MOVA
4320 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4322 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4323 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4324 newsrc = gen_rtx_UNSPEC (SImode,
4325 gen_rtvec (1, newsrc),
4330 lab = add_constant (src, mode, 0);
4331 newsrc = gen_rtx_MEM (mode,
4332 gen_rtx_LABEL_REF (VOIDmode, lab));
4333 RTX_UNCHANGING_P (newsrc) = 1;
4335 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4336 INSN_CODE (scan) = -1;
4339 dump_table (need_aligned_label ? insn : 0, barrier);
4344 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4345 INSN_ADDRESSES_FREE ();
4346 split_branches (first);
4348 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4349 also has an effect on the register that holds the address of the sfunc.
4350 Insert an extra dummy insn in front of each sfunc that pretends to
4351 use this register. */
4352 if (flag_delayed_branch)
4354 for (insn = first; insn; insn = NEXT_INSN (insn))
4356 rtx reg = sfunc_uses_reg (insn);
4360 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4364 /* fpscr is not actually a user variable, but we pretend it is for the
4365 sake of the previous optimization passes, since we want it handled like
4366 one. However, we don't have any debugging information for it, so turn
4367 it into a non-user variable now. */
4369 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4371 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4375 get_dest_uid (rtx label, int max_uid)
4377 rtx dest = next_real_insn (label);
4380 /* This can happen for an undefined label. */
4382 dest_uid = INSN_UID (dest);
4383 /* If this is a newly created branch redirection blocking instruction,
4384 we cannot index the branch_uid or insn_addresses arrays with its
4385 uid. But then, we won't need to, because the actual destination is
4386 the following branch. */
4387 while (dest_uid >= max_uid)
4389 dest = NEXT_INSN (dest);
4390 dest_uid = INSN_UID (dest);
4392 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4397 /* Split condbranches that are out of range. Also add clobbers for
4398 scratch registers that are needed in far jumps.
4399 We do this before delay slot scheduling, so that it can take our
4400 newly created instructions into account. It also allows us to
4401 find branches with common targets more easily. */
4404 split_branches (rtx first)
4407 struct far_branch **uid_branch, *far_branch_list = 0;
4408 int max_uid = get_max_uid ();
4410 /* Find out which branches are out of range. */
4411 shorten_branches (first);
4413 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4414 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4416 for (insn = first; insn; insn = NEXT_INSN (insn))
4417 if (! INSN_P (insn))
4419 else if (INSN_DELETED_P (insn))
4421 /* Shorten_branches would split this instruction again,
4422 so transform it into a note. */
4423 PUT_CODE (insn, NOTE);
4424 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4425 NOTE_SOURCE_FILE (insn) = 0;
4427 else if (GET_CODE (insn) == JUMP_INSN
4428 /* Don't mess with ADDR_DIFF_VEC */
4429 && (GET_CODE (PATTERN (insn)) == SET
4430 || GET_CODE (PATTERN (insn)) == RETURN))
4432 enum attr_type type = get_attr_type (insn);
4433 if (type == TYPE_CBRANCH)
4437 if (get_attr_length (insn) > 4)
4439 rtx src = SET_SRC (PATTERN (insn));
4440 rtx olabel = XEXP (XEXP (src, 1), 0);
4441 int addr = INSN_ADDRESSES (INSN_UID (insn));
4443 int dest_uid = get_dest_uid (olabel, max_uid);
4444 struct far_branch *bp = uid_branch[dest_uid];
4446 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4447 the label if the LABEL_NUSES count drops to zero. There is
4448 always a jump_optimize pass that sets these values, but it
4449 proceeds to delete unreferenced code, and then if not
4450 optimizing, to un-delete the deleted instructions, thus
4451 leaving labels with too low uses counts. */
4454 JUMP_LABEL (insn) = olabel;
4455 LABEL_NUSES (olabel)++;
4459 bp = (struct far_branch *) alloca (sizeof *bp);
4460 uid_branch[dest_uid] = bp;
4461 bp->prev = far_branch_list;
4462 far_branch_list = bp;
4464 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4465 LABEL_NUSES (bp->far_label)++;
4469 label = bp->near_label;
4470 if (! label && bp->address - addr >= CONDJUMP_MIN)
4472 rtx block = bp->insert_place;
4474 if (GET_CODE (PATTERN (block)) == RETURN)
4475 block = PREV_INSN (block);
4477 block = gen_block_redirect (block,
4479 label = emit_label_after (gen_label_rtx (),
4481 bp->near_label = label;
4483 else if (label && ! NEXT_INSN (label))
4485 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4486 bp->insert_place = insn;
4488 gen_far_branch (bp);
4492 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4494 bp->near_label = label = gen_label_rtx ();
4495 bp->insert_place = insn;
4498 if (! redirect_jump (insn, label, 1))
4503 /* get_attr_length (insn) == 2 */
4504 /* Check if we have a pattern where reorg wants to redirect
4505 the branch to a label from an unconditional branch that
4507 /* We can't use JUMP_LABEL here because it might be undefined
4508 when not optimizing. */
4509 /* A syntax error might cause beyond to be NULL_RTX. */
4511 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4515 && (GET_CODE (beyond) == JUMP_INSN
4516 || ((beyond = next_active_insn (beyond))
4517 && GET_CODE (beyond) == JUMP_INSN))
4518 && GET_CODE (PATTERN (beyond)) == SET
4519 && recog_memoized (beyond) == CODE_FOR_jump_compact
4521 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4522 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4524 gen_block_redirect (beyond,
4525 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4528 next = next_active_insn (insn);
4530 if ((GET_CODE (next) == JUMP_INSN
4531 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4532 && GET_CODE (PATTERN (next)) == SET
4533 && recog_memoized (next) == CODE_FOR_jump_compact
4535 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4536 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4538 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4540 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4542 int addr = INSN_ADDRESSES (INSN_UID (insn));
4545 struct far_branch *bp;
4547 if (type == TYPE_JUMP)
4549 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4550 dest_uid = get_dest_uid (far_label, max_uid);
4553 /* Parse errors can lead to labels outside
4555 if (! NEXT_INSN (far_label))
4560 JUMP_LABEL (insn) = far_label;
4561 LABEL_NUSES (far_label)++;
4563 redirect_jump (insn, NULL_RTX, 1);
4567 bp = uid_branch[dest_uid];
4570 bp = (struct far_branch *) alloca (sizeof *bp);
4571 uid_branch[dest_uid] = bp;
4572 bp->prev = far_branch_list;
4573 far_branch_list = bp;
4575 bp->far_label = far_label;
4577 LABEL_NUSES (far_label)++;
4579 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4580 if (addr - bp->address <= CONDJUMP_MAX)
4581 emit_label_after (bp->near_label, PREV_INSN (insn));
4584 gen_far_branch (bp);
4590 bp->insert_place = insn;
4592 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4594 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4597 /* Generate all pending far branches,
4598 and free our references to the far labels. */
4599 while (far_branch_list)
4601 if (far_branch_list->near_label
4602 && ! NEXT_INSN (far_branch_list->near_label))
4603 gen_far_branch (far_branch_list);
4605 && far_branch_list->far_label
4606 && ! --LABEL_NUSES (far_branch_list->far_label))
4607 delete_insn (far_branch_list->far_label);
4608 far_branch_list = far_branch_list->prev;
4611 /* Instruction length information is no longer valid due to the new
4612 instructions that have been generated. */
4613 init_insn_lengths ();
4616 /* Dump out instruction addresses, which is useful for debugging the
4617 constant pool table stuff.
4619 If relaxing, output the label and pseudo-ops used to link together
4620 calls and the instruction which set the registers. */
4622 /* ??? The addresses printed by this routine for insns are nonsense for
4623 insns which are inside of a sequence where none of the inner insns have
4624 variable length. This is because the second pass of shorten_branches
4625 does not bother to update them. */
4628 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4629 int noperands ATTRIBUTE_UNUSED)
4631 if (TARGET_DUMPISIZE)
4632 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4638 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4643 pattern = PATTERN (insn);
4644 if (GET_CODE (pattern) == PARALLEL)
4645 pattern = XVECEXP (pattern, 0, 0);
4646 if (GET_CODE (pattern) == CALL
4647 || (GET_CODE (pattern) == SET
4648 && (GET_CODE (SET_SRC (pattern)) == CALL
4649 || get_attr_type (insn) == TYPE_SFUNC)))
4650 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4651 CODE_LABEL_NUMBER (XEXP (note, 0)));
4652 else if (GET_CODE (pattern) == SET)
4653 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4654 CODE_LABEL_NUMBER (XEXP (note, 0)));
4661 /* Dump out any constants accumulated in the final pass. These will
4665 output_jump_label_table (void)
4671 fprintf (asm_out_file, "\t.align 2\n");
4672 for (i = 0; i < pool_size; i++)
4674 pool_node *p = &pool_vector[i];
4676 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4677 CODE_LABEL_NUMBER (p->label));
4678 output_asm_insn (".long %O0", &p->value);
4686 /* A full frame looks like:
4690 [ if current_function_anonymous_args
4703 local-0 <- fp points here. */
4705 /* Number of bytes pushed for anonymous args, used to pass information
4706 between expand_prologue and expand_epilogue. */
4708 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4709 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4710 for an epilogue and a negative value means that it's for a sibcall
4711 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4712 all the registers that are about to be restored, and hence dead. */
4715 output_stack_adjust (int size, rtx reg, int epilogue_p,
4716 HARD_REG_SET *live_regs_mask)
4718 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4721 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4726 if (CONST_OK_FOR_ADD (size))
4727 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4728 /* Try to do it with two partial adjustments; however, we must make
4729 sure that the stack is properly aligned at all times, in case
4730 an interrupt occurs between the two partial adjustments. */
4731 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4732 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4734 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4735 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4741 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4744 /* If TEMP is invalid, we could temporarily save a general
4745 register to MACL. However, there is currently no need
4746 to handle this case, so just abort when we see it. */
4748 || current_function_interrupt
4749 || ! call_used_regs[temp] || fixed_regs[temp])
4751 if (temp < 0 && ! current_function_interrupt
4752 && (TARGET_SHMEDIA || epilogue_p >= 0))
4755 COPY_HARD_REG_SET (temps, call_used_reg_set);
4756 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4760 if (current_function_return_rtx)
4762 enum machine_mode mode;
4763 mode = GET_MODE (current_function_return_rtx);
4764 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4765 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4767 for (i = 0; i < nreg; i++)
4768 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4769 if (current_function_calls_eh_return)
4771 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4772 for (i = 0; i <= 3; i++)
4773 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4776 if (TARGET_SHMEDIA && epilogue_p < 0)
4777 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4778 CLEAR_HARD_REG_BIT (temps, i);
4779 if (epilogue_p <= 0)
4781 for (i = FIRST_PARM_REG;
4782 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4783 CLEAR_HARD_REG_BIT (temps, i);
4784 if (cfun->static_chain_decl != NULL)
4785 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4787 temp = scavenge_reg (&temps);
4789 if (temp < 0 && live_regs_mask)
4790 temp = scavenge_reg (live_regs_mask);
4793 /* If we reached here, the most likely case is the (sibcall)
4794 epilogue for non SHmedia. Put a special push/pop sequence
4795 for such case as the last resort. This looks lengthy but
4796 would not be problem because it seems to be very rare. */
4797 if (! TARGET_SHMEDIA && epilogue_p)
4799 rtx adj_reg, tmp_reg, mem;
4801 /* ??? There is still the slight possibility that r4 or r5
4802 have been reserved as fixed registers or assigned as
4803 global registers, and they change during an interrupt.
4804 There are possible ways to handle this:
4805 - If we are adjusting the frame pointer (r14), we can do
4806 with a single temp register and an ordinary push / pop
4808 - Grab any call-used or call-saved registers (i.e. not
4809 fixed or globals) for the temps we need. We might
4810 also grab r14 if we are adjusting the stack pointer.
4811 If we can't find enough available registers, issue
4812 a diagnostic and abort - the user must have reserved
4813 way too many registers.
4814 But since all this is rather unlikely to happen and
4815 would require extra testing, we just abort if r4 / r5
4816 are not available. */
4817 if (fixed_regs[4] || fixed_regs[5]
4818 || global_regs[4] || global_regs[5])
4821 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4822 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4823 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4824 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4825 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4826 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4827 emit_move_insn (mem, tmp_reg);
4828 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4829 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4830 emit_move_insn (mem, tmp_reg);
4831 emit_move_insn (reg, adj_reg);
4832 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4833 emit_move_insn (adj_reg, mem);
4834 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4835 emit_move_insn (tmp_reg, mem);
4841 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4843 /* If SIZE is negative, subtract the positive value.
4844 This sometimes allows a constant pool entry to be shared
4845 between prologue and epilogue code. */
4848 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4849 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4853 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4854 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4858 = (gen_rtx_EXPR_LIST
4859 (REG_FRAME_RELATED_EXPR,
4860 gen_rtx_SET (VOIDmode, reg,
4861 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4871 RTX_FRAME_RELATED_P (x) = 1;
4875 /* Output RTL to push register RN onto the stack. */
4882 x = gen_push_fpul ();
4883 else if (rn == FPSCR_REG)
4884 x = gen_push_fpscr ();
4885 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4886 && FP_OR_XD_REGISTER_P (rn))
4888 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4890 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4892 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4893 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4895 x = gen_push (gen_rtx_REG (SImode, rn));
4899 = gen_rtx_EXPR_LIST (REG_INC,
4900 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4904 /* Output RTL to pop register RN from the stack. */
4911 x = gen_pop_fpul ();
4912 else if (rn == FPSCR_REG)
4913 x = gen_pop_fpscr ();
4914 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4915 && FP_OR_XD_REGISTER_P (rn))
4917 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4919 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4921 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4922 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4924 x = gen_pop (gen_rtx_REG (SImode, rn));
4928 = gen_rtx_EXPR_LIST (REG_INC,
4929 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4932 /* Generate code to push the regs specified in the mask. */
4935 push_regs (HARD_REG_SET *mask, int interrupt_handler)
4940 /* Push PR last; this gives better latencies after the prologue, and
4941 candidates for the return delay slot when there are no general
4942 registers pushed. */
4943 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4945 /* If this is an interrupt handler, and the SZ bit varies,
4946 and we have to push any floating point register, we need
4947 to switch to the correct precision first. */
4948 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4949 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4951 HARD_REG_SET unsaved;
4954 COMPL_HARD_REG_SET (unsaved, *mask);
4955 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4959 && (i != FPSCR_REG || ! skip_fpscr)
4960 && TEST_HARD_REG_BIT (*mask, i))
4963 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4967 /* Calculate how much extra space is needed to save all callee-saved
4969 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4972 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
4975 int stack_space = 0;
4976 int interrupt_handler = sh_cfun_interrupt_handler_p ();
4978 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4979 if ((! call_used_regs[reg] || interrupt_handler)
4980 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4981 /* Leave space to save this target register on the stack,
4982 in case target register allocation wants to use it. */
4983 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4987 /* Decide whether we should reserve space for callee-save target registers,
4988 in case target register allocation wants to use them. REGS_SAVED is
4989 the space, in bytes, that is already required for register saves.
4990 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4993 shmedia_reserve_space_for_target_registers_p (int regs_saved,
4994 HARD_REG_SET *live_regs_mask)
4998 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5001 /* Decide how much space to reserve for callee-save target registers
5002 in case target register allocation wants to use them.
5003 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5006 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5008 if (shmedia_space_reserved_for_target_registers)
5009 return shmedia_target_regs_stack_space (live_regs_mask);
5014 /* Work out the registers which need to be saved, both as a mask and a
5015 count of saved words. Return the count.
5017 If doing a pragma interrupt function, then push all regs used by the
5018 function, and if we call another function (we can tell by looking at PR),
5019 make sure that all the regs it clobbers are safe too. */
5022 calc_live_regs (HARD_REG_SET *live_regs_mask)
5026 int interrupt_handler;
5027 int pr_live, has_call;
5029 interrupt_handler = sh_cfun_interrupt_handler_p ();
5031 CLEAR_HARD_REG_SET (*live_regs_mask);
5032 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
5033 && regs_ever_live[FPSCR_REG])
5034 target_flags &= ~FPU_SINGLE_BIT;
5035 /* If we can save a lot of saves by switching to double mode, do that. */
5036 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
5037 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5038 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5039 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
5042 target_flags &= ~FPU_SINGLE_BIT;
5045 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5046 knows how to use it. That means the pseudo originally allocated for
5047 the initial value can become the PR_MEDIA_REG hard register, as seen for
5048 execute/20010122-1.c:test9. */
5050 /* ??? this function is called from initial_elimination_offset, hence we
5051 can't use the result of sh_media_register_for_return here. */
5052 pr_live = sh_pr_n_sets ();
5055 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5056 pr_live = (pr_initial
5057 ? (GET_CODE (pr_initial) != REG
5058 || REGNO (pr_initial) != (PR_REG))
5059 : regs_ever_live[PR_REG]);
5060 /* For Shcompact, if not optimizing, we end up with a memory reference
5061 using the return address pointer for __builtin_return_address even
5062 though there is no actual need to put the PR register on the stack. */
5063 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5065 /* Force PR to be live if the prologue has to call the SHmedia
5066 argument decoder or register saver. */
5067 if (TARGET_SHCOMPACT
5068 && ((current_function_args_info.call_cookie
5069 & ~ CALL_COOKIE_RET_TRAMP (1))
5070 || current_function_has_nonlocal_label))
5072 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5073 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5075 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5077 : (interrupt_handler && ! pragma_trapa)
5078 ? (/* Need to save all the regs ever live. */
5079 (regs_ever_live[reg]
5080 || (call_used_regs[reg]
5081 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
5083 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5084 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5085 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5086 && reg != RETURN_ADDRESS_POINTER_REGNUM
5087 && reg != T_REG && reg != GBR_REG
5088 /* Push fpscr only on targets which have FPU */
5089 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5090 : (/* Only push those regs which are used and need to be saved. */
5093 && current_function_args_info.call_cookie
5094 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5095 || (regs_ever_live[reg] && ! call_used_regs[reg])
5096 || (current_function_calls_eh_return
5097 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5098 || reg == (int) EH_RETURN_DATA_REGNO (1)
5099 || reg == (int) EH_RETURN_DATA_REGNO (2)
5100 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5101 || ((reg == MACL_REG || reg == MACH_REG)
5102 && regs_ever_live[reg]
5103 && sh_cfun_attr_renesas_p ())
5106 SET_HARD_REG_BIT (*live_regs_mask, reg);
5107 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5109 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
5110 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5112 if (FP_REGISTER_P (reg))
5114 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5116 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5117 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5120 else if (XD_REGISTER_P (reg))
5122 /* Must switch to double mode to access these registers. */
5123 target_flags &= ~FPU_SINGLE_BIT;
5128 /* If we have a target register optimization pass after prologue / epilogue
5129 threading, we need to assume all target registers will be live even if
5131 if (flag_branch_target_load_optimize2
5132 && TARGET_SAVE_ALL_TARGET_REGS
5133 && shmedia_space_reserved_for_target_registers)
5134 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5135 if ((! call_used_regs[reg] || interrupt_handler)
5136 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5138 SET_HARD_REG_BIT (*live_regs_mask, reg);
5139 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5141 /* If this is an interrupt handler, we don't have any call-clobbered
5142 registers we can conveniently use for target register save/restore.
5143 Make sure we save at least one general purpose register when we need
5144 to save target registers. */
5145 if (interrupt_handler
5146 && hard_regs_intersect_p (live_regs_mask,
5147 ®_class_contents[TARGET_REGS])
5148 && ! hard_regs_intersect_p (live_regs_mask,
5149 ®_class_contents[GENERAL_REGS]))
5151 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5152 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5158 /* Code to generate prologue and epilogue sequences */
5160 /* PUSHED is the number of bytes that are being pushed on the
5161 stack for register saves. Return the frame size, padded
5162 appropriately so that the stack stays properly aligned. */
5163 static HOST_WIDE_INT
5164 rounded_frame_size (int pushed)
5166 HOST_WIDE_INT size = get_frame_size ();
5167 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5169 return ((size + pushed + align - 1) & -align) - pushed;
5172 /* Choose a call-clobbered target-branch register that remains
5173 unchanged along the whole function. We set it up as the return
5174 value in the prologue. */
5176 sh_media_register_for_return (void)
5181 if (! current_function_is_leaf)
5183 if (lookup_attribute ("interrupt_handler",
5184 DECL_ATTRIBUTES (current_function_decl)))
5187 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5189 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5190 if (call_used_regs[regno] && ! regs_ever_live[regno])
5196 /* The maximum registers we need to save are:
5197 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5198 - 32 floating point registers (for each pair, we save none,
5199 one single precision value, or a double precision value).
5200 - 8 target registers
5201 - add 1 entry for a delimiter. */
5202 #define MAX_SAVED_REGS (62+32+8)
5204 typedef struct save_entry_s
5213 /* There will be a delimiter entry with VOIDmode both at the start and the
5214 end of a filled in schedule. The end delimiter has the offset of the
5215 save with the smallest (i.e. most negative) offset. */
5216 typedef struct save_schedule_s
5218 save_entry entries[MAX_SAVED_REGS + 2];
5219 int temps[MAX_TEMPS+1];
5222 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5223 use reverse order. Returns the last entry written to (not counting
5224 the delimiter). OFFSET_BASE is a number to be added to all offset
5228 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5232 save_entry *entry = schedule->entries;
5236 if (! current_function_interrupt)
5237 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5238 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5239 && ! FUNCTION_ARG_REGNO_P (i)
5240 && i != FIRST_RET_REG
5241 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5242 && ! (current_function_calls_eh_return
5243 && (i == EH_RETURN_STACKADJ_REGNO
5244 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5245 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5246 schedule->temps[tmpx++] = i;
5248 entry->mode = VOIDmode;
5249 entry->offset = offset_base;
5251 /* We loop twice: first, we save 8-byte aligned registers in the
5252 higher addresses, that are known to be aligned. Then, we
5253 proceed to saving 32-bit registers that don't need 8-byte
5255 If this is an interrupt function, all registers that need saving
5256 need to be saved in full. moreover, we need to postpone saving
5257 target registers till we have saved some general purpose registers
5258 we can then use as scratch registers. */
5259 offset = offset_base;
5260 for (align = 1; align >= 0; align--)
5262 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5263 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5265 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5268 if (current_function_interrupt)
5270 if (TARGET_REGISTER_P (i))
5272 if (GENERAL_REGISTER_P (i))
5275 if (mode == SFmode && (i % 2) == 1
5276 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5277 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5284 /* If we're doing the aligned pass and this is not aligned,
5285 or we're doing the unaligned pass and this is aligned,
5287 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5291 if (current_function_interrupt
5292 && GENERAL_REGISTER_P (i)
5293 && tmpx < MAX_TEMPS)
5294 schedule->temps[tmpx++] = i;
5296 offset -= GET_MODE_SIZE (mode);
5299 entry->offset = offset;
5302 if (align && current_function_interrupt)
5303 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5304 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5306 offset -= GET_MODE_SIZE (DImode);
5308 entry->mode = DImode;
5309 entry->offset = offset;
5314 entry->mode = VOIDmode;
5315 entry->offset = offset;
5316 schedule->temps[tmpx] = -1;
5321 sh_expand_prologue (void)
5323 HARD_REG_SET live_regs_mask;
5326 int save_flags = target_flags;
5329 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5331 /* We have pretend args if we had an object sent partially in registers
5332 and partially on the stack, e.g. a large structure. */
5333 pretend_args = current_function_pretend_args_size;
5334 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5335 && (NPARM_REGS(SImode)
5336 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5338 output_stack_adjust (-pretend_args
5339 - current_function_args_info.stack_regs * 8,
5340 stack_pointer_rtx, 0, NULL);
5342 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5343 /* We're going to use the PIC register to load the address of the
5344 incoming-argument decoder and/or of the return trampoline from
5345 the GOT, so make sure the PIC register is preserved and
5347 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5349 if (TARGET_SHCOMPACT
5350 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5354 /* First, make all registers with incoming arguments that will
5355 be pushed onto the stack live, so that register renaming
5356 doesn't overwrite them. */
5357 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5358 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5359 >= NPARM_REGS (SImode) - reg)
5360 for (; reg < NPARM_REGS (SImode); reg++)
5361 emit_insn (gen_shcompact_preserve_incoming_args
5362 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5363 else if (CALL_COOKIE_INT_REG_GET
5364 (current_function_args_info.call_cookie, reg) == 1)
5365 emit_insn (gen_shcompact_preserve_incoming_args
5366 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5368 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5370 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5371 GEN_INT (current_function_args_info.call_cookie));
5372 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5373 gen_rtx_REG (SImode, R0_REG));
5375 else if (TARGET_SHMEDIA)
5377 int tr = sh_media_register_for_return ();
5381 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5382 gen_rtx_REG (DImode, PR_MEDIA_REG));
5384 /* ??? We should suppress saving pr when we don't need it, but this
5385 is tricky because of builtin_return_address. */
5387 /* If this function only exits with sibcalls, this copy
5388 will be flagged as dead. */
5389 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5395 /* Emit the code for SETUP_VARARGS. */
5396 if (current_function_stdarg)
5398 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5400 /* Push arg regs as if they'd been provided by caller in stack. */
5401 for (i = 0; i < NPARM_REGS(SImode); i++)
5403 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5406 if (i >= (NPARM_REGS(SImode)
5407 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5411 RTX_FRAME_RELATED_P (insn) = 0;
5416 /* If we're supposed to switch stacks at function entry, do so now. */
5418 emit_insn (gen_sp_switch_1 ());
5420 d = calc_live_regs (&live_regs_mask);
5421 /* ??? Maybe we could save some switching if we can move a mode switch
5422 that already happens to be at the function start into the prologue. */
5423 if (target_flags != save_flags && ! current_function_interrupt)
5424 emit_insn (gen_toggle_sz ());
5428 int offset_base, offset;
5430 int offset_in_r0 = -1;
5432 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5433 int total_size, save_size;
5434 save_schedule schedule;
5438 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5439 && ! current_function_interrupt)
5440 r0 = gen_rtx_REG (Pmode, R0_REG);
5442 /* D is the actual number of bytes that we need for saving registers,
5443 however, in initial_elimination_offset we have committed to using
5444 an additional TREGS_SPACE amount of bytes - in order to keep both
5445 addresses to arguments supplied by the caller and local variables
5446 valid, we must keep this gap. Place it between the incoming
5447 arguments and the actually saved registers in a bid to optimize
5448 locality of reference. */
5449 total_size = d + tregs_space;
5450 total_size += rounded_frame_size (total_size);
5451 save_size = total_size - rounded_frame_size (d);
5452 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5453 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5454 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5456 /* If adjusting the stack in a single step costs nothing extra, do so.
5457 I.e. either if a single addi is enough, or we need a movi anyway,
5458 and we don't exceed the maximum offset range (the test for the
5459 latter is conservative for simplicity). */
5461 && (CONST_OK_FOR_I10 (-total_size)
5462 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5463 && total_size <= 2044)))
5464 d_rounding = total_size - save_size;
5466 offset_base = d + d_rounding;
5468 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5471 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5472 tmp_pnt = schedule.temps;
5473 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5475 enum machine_mode mode = entry->mode;
5476 int reg = entry->reg;
5477 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5479 offset = entry->offset;
5481 reg_rtx = gen_rtx_REG (mode, reg);
5483 mem_rtx = gen_rtx_MEM (mode,
5484 gen_rtx_PLUS (Pmode,
5488 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5496 if (HAVE_PRE_DECREMENT
5497 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5498 || mem_rtx == NULL_RTX
5499 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5501 pre_dec = gen_rtx_MEM (mode,
5502 gen_rtx_PRE_DEC (Pmode, r0));
5504 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5513 offset += GET_MODE_SIZE (mode);
5517 if (mem_rtx != NULL_RTX)
5520 if (offset_in_r0 == -1)
5522 emit_move_insn (r0, GEN_INT (offset));
5523 offset_in_r0 = offset;
5525 else if (offset != offset_in_r0)
5530 GEN_INT (offset - offset_in_r0)));
5531 offset_in_r0 += offset - offset_in_r0;
5534 if (pre_dec != NULL_RTX)
5540 (Pmode, r0, stack_pointer_rtx));
5544 offset -= GET_MODE_SIZE (mode);
5545 offset_in_r0 -= GET_MODE_SIZE (mode);
5550 mem_rtx = gen_rtx_MEM (mode, r0);
5552 mem_rtx = gen_rtx_MEM (mode,
5553 gen_rtx_PLUS (Pmode,
5557 /* We must not use an r0-based address for target-branch
5558 registers or for special registers without pre-dec
5559 memory addresses, since we store their values in r0
5561 if (TARGET_REGISTER_P (reg)
5562 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5563 && mem_rtx != pre_dec))
5567 if (TARGET_REGISTER_P (reg)
5568 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5569 && mem_rtx != pre_dec))
5571 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5573 emit_move_insn (tmp_reg, reg_rtx);
5575 if (REGNO (tmp_reg) == R0_REG)
5579 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5583 if (*++tmp_pnt <= 0)
5584 tmp_pnt = schedule.temps;
5591 /* Mark as interesting for dwarf cfi generator */
5592 insn = emit_move_insn (mem_rtx, reg_rtx);
5593 RTX_FRAME_RELATED_P (insn) = 1;
5595 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5597 rtx reg_rtx = gen_rtx_REG (mode, reg);
5599 rtx mem_rtx = gen_rtx_MEM (mode,
5600 gen_rtx_PLUS (Pmode,
5604 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5605 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5607 REG_NOTES (insn) = note_rtx;
5612 if (entry->offset != d_rounding)
5616 push_regs (&live_regs_mask, current_function_interrupt);
5618 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5620 rtx insn = get_last_insn ();
5621 rtx last = emit_insn (gen_GOTaddr2picreg ());
5623 /* Mark these insns as possibly dead. Sometimes, flow2 may
5624 delete all uses of the PIC register. In this case, let it
5625 delete the initialization too. */
5628 insn = NEXT_INSN (insn);
5630 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5634 while (insn != last);
5637 if (SHMEDIA_REGS_STACK_ADJUST ())
5639 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5640 function_symbol (TARGET_FPU_ANY
5641 ? "__GCC_push_shmedia_regs"
5642 : "__GCC_push_shmedia_regs_nofpu"));
5643 /* This must NOT go through the PLT, otherwise mach and macl
5644 may be clobbered. */
5645 emit_insn (gen_shmedia_save_restore_regs_compact
5646 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5649 if (target_flags != save_flags && ! current_function_interrupt)
5651 rtx insn = emit_insn (gen_toggle_sz ());
5653 /* If we're lucky, a mode switch in the function body will
5654 overwrite fpscr, turning this insn dead. Tell flow this
5655 insn is ok to delete. */
5656 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5661 target_flags = save_flags;
5663 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5664 stack_pointer_rtx, 0, NULL);
5666 if (frame_pointer_needed)
5667 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5669 if (TARGET_SHCOMPACT
5670 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5672 /* This must NOT go through the PLT, otherwise mach and macl
5673 may be clobbered. */
5674 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5675 function_symbol ("__GCC_shcompact_incoming_args"));
5676 emit_insn (gen_shcompact_incoming_args ());
5681 sh_expand_epilogue (bool sibcall_p)
5683 HARD_REG_SET live_regs_mask;
5687 int save_flags = target_flags;
5688 int frame_size, save_size;
5689 int fpscr_deferred = 0;
5690 int e = sibcall_p ? -1 : 1;
5692 d = calc_live_regs (&live_regs_mask);
5695 frame_size = rounded_frame_size (d);
5699 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5701 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5702 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5703 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5705 total_size = d + tregs_space;
5706 total_size += rounded_frame_size (total_size);
5707 save_size = total_size - frame_size;
5709 /* If adjusting the stack in a single step costs nothing extra, do so.
5710 I.e. either if a single addi is enough, or we need a movi anyway,
5711 and we don't exceed the maximum offset range (the test for the
5712 latter is conservative for simplicity). */
5714 && ! frame_pointer_needed
5715 && (CONST_OK_FOR_I10 (total_size)
5716 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5717 && total_size <= 2044)))
5718 d_rounding = frame_size;
5720 frame_size -= d_rounding;
5723 if (frame_pointer_needed)
5725 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5727 /* We must avoid moving the stack pointer adjustment past code
5728 which reads from the local frame, else an interrupt could
5729 occur after the SP adjustment and clobber data in the local
5731 emit_insn (gen_blockage ());
5732 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5734 else if (frame_size)
5736 /* We must avoid moving the stack pointer adjustment past code
5737 which reads from the local frame, else an interrupt could
5738 occur after the SP adjustment and clobber data in the local
5740 emit_insn (gen_blockage ());
5741 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5744 if (SHMEDIA_REGS_STACK_ADJUST ())
5746 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5747 function_symbol (TARGET_FPU_ANY
5748 ? "__GCC_pop_shmedia_regs"
5749 : "__GCC_pop_shmedia_regs_nofpu"));
5750 /* This must NOT go through the PLT, otherwise mach and macl
5751 may be clobbered. */
5752 emit_insn (gen_shmedia_save_restore_regs_compact
5753 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5756 /* Pop all the registers. */
5758 if (target_flags != save_flags && ! current_function_interrupt)
5759 emit_insn (gen_toggle_sz ());
5762 int offset_base, offset;
5763 int offset_in_r0 = -1;
5765 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5766 save_schedule schedule;
5770 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5771 offset_base = -entry[1].offset + d_rounding;
5772 tmp_pnt = schedule.temps;
5773 for (; entry->mode != VOIDmode; entry--)
5775 enum machine_mode mode = entry->mode;
5776 int reg = entry->reg;
5777 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5779 offset = offset_base + entry->offset;
5780 reg_rtx = gen_rtx_REG (mode, reg);
5782 mem_rtx = gen_rtx_MEM (mode,
5783 gen_rtx_PLUS (Pmode,
5787 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5793 if (HAVE_POST_INCREMENT
5794 && (offset == offset_in_r0
5795 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5796 && mem_rtx == NULL_RTX)
5797 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5799 post_inc = gen_rtx_MEM (mode,
5800 gen_rtx_POST_INC (Pmode, r0));
5802 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5805 post_inc = NULL_RTX;
5814 if (mem_rtx != NULL_RTX)
5817 if (offset_in_r0 == -1)
5819 emit_move_insn (r0, GEN_INT (offset));
5820 offset_in_r0 = offset;
5822 else if (offset != offset_in_r0)
5827 GEN_INT (offset - offset_in_r0)));
5828 offset_in_r0 += offset - offset_in_r0;
5831 if (post_inc != NULL_RTX)
5837 (Pmode, r0, stack_pointer_rtx));
5843 offset_in_r0 += GET_MODE_SIZE (mode);
5846 mem_rtx = gen_rtx_MEM (mode, r0);
5848 mem_rtx = gen_rtx_MEM (mode,
5849 gen_rtx_PLUS (Pmode,
5853 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5854 && mem_rtx != post_inc)
5858 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5859 && mem_rtx != post_inc)
5861 insn = emit_move_insn (r0, mem_rtx);
5864 else if (TARGET_REGISTER_P (reg))
5866 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5868 /* Give the scheduler a bit of freedom by using up to
5869 MAX_TEMPS registers in a round-robin fashion. */
5870 insn = emit_move_insn (tmp_reg, mem_rtx);
5873 tmp_pnt = schedule.temps;
5876 insn = emit_move_insn (reg_rtx, mem_rtx);
5877 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5878 /* This is dead, unless we return with a sibcall. */
5879 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5884 if (entry->offset + offset_base != d + d_rounding)
5887 else /* ! TARGET_SH5 */
5890 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5892 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5894 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5896 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5897 && hard_regs_intersect_p (&live_regs_mask,
5898 ®_class_contents[DF_REGS]))
5900 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5902 if (j == FIRST_FP_REG && fpscr_deferred)
5907 if (target_flags != save_flags && ! current_function_interrupt)
5908 emit_insn (gen_toggle_sz ());
5909 target_flags = save_flags;
5911 output_stack_adjust (current_function_pretend_args_size
5912 + save_size + d_rounding
5913 + current_function_args_info.stack_regs * 8,
5914 stack_pointer_rtx, e, NULL);
5916 if (current_function_calls_eh_return)
5917 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5918 EH_RETURN_STACKADJ_RTX));
5920 /* Switch back to the normal stack if necessary. */
5922 emit_insn (gen_sp_switch_2 ());
5924 /* Tell flow the insn that pops PR isn't dead. */
5925 /* PR_REG will never be live in SHmedia mode, and we don't need to
5926 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5927 by the return pattern. */
5928 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5929 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5932 static int sh_need_epilogue_known = 0;
5935 sh_need_epilogue (void)
5937 if (! sh_need_epilogue_known)
5942 sh_expand_epilogue (0);
5943 epilogue = get_insns ();
5945 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5947 return sh_need_epilogue_known > 0;
5950 /* Emit code to change the current function's return address to RA.
5951 TEMP is available as a scratch register, if needed. */
5954 sh_set_return_address (rtx ra, rtx tmp)
5956 HARD_REG_SET live_regs_mask;
5958 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5961 d = calc_live_regs (&live_regs_mask);
5963 /* If pr_reg isn't life, we can set it (or the register given in
5964 sh_media_register_for_return) directly. */
5965 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5971 int rr_regno = sh_media_register_for_return ();
5976 rr = gen_rtx_REG (DImode, rr_regno);
5979 rr = gen_rtx_REG (SImode, pr_reg);
5981 emit_insn (GEN_MOV (rr, ra));
5982 /* Tell flow the register for return isn't dead. */
5983 emit_insn (gen_rtx_USE (VOIDmode, rr));
5990 save_schedule schedule;
5993 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
5994 offset = entry[1].offset;
5995 for (; entry->mode != VOIDmode; entry--)
5996 if (entry->reg == pr_reg)
5999 /* We can't find pr register. */
6003 offset = entry->offset - offset;
6004 pr_offset = (rounded_frame_size (d) + offset
6005 + SHMEDIA_REGS_STACK_ADJUST ());
6008 pr_offset = rounded_frame_size (d);
6010 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6011 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6013 tmp = gen_rtx_MEM (Pmode, tmp);
6014 emit_insn (GEN_MOV (tmp, ra));
6017 /* Clear variables at function end. */
6020 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6021 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6023 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6024 sh_need_epilogue_known = 0;
6025 sp_switch = NULL_RTX;
6029 sh_builtin_saveregs (void)
6031 /* First unnamed integer register. */
6032 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6033 /* Number of integer registers we need to save. */
6034 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6035 /* First unnamed SFmode float reg */
6036 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6037 /* Number of SFmode float regs to save. */
6038 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6041 HOST_WIDE_INT alias_set;
6047 int pushregs = n_intregs;
6049 while (pushregs < NPARM_REGS (SImode) - 1
6050 && (CALL_COOKIE_INT_REG_GET
6051 (current_function_args_info.call_cookie,
6052 NPARM_REGS (SImode) - pushregs)
6055 current_function_args_info.call_cookie
6056 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6061 if (pushregs == NPARM_REGS (SImode))
6062 current_function_args_info.call_cookie
6063 |= (CALL_COOKIE_INT_REG (0, 1)
6064 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6066 current_function_args_info.call_cookie
6067 |= CALL_COOKIE_STACKSEQ (pushregs);
6069 current_function_pretend_args_size += 8 * n_intregs;
6071 if (TARGET_SHCOMPACT)
6075 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6077 error ("__builtin_saveregs not supported by this subtarget");
6084 /* Allocate block of memory for the regs. */
6085 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6086 Or can assign_stack_local accept a 0 SIZE argument? */
6087 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6090 regbuf = gen_rtx_MEM (BLKmode,
6091 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6092 else if (n_floatregs & 1)
6096 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6097 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6098 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6099 regbuf = change_address (regbuf, BLKmode, addr);
6102 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6103 alias_set = get_varargs_alias_set ();
6104 set_mem_alias_set (regbuf, alias_set);
6107 This is optimized to only save the regs that are necessary. Explicitly
6108 named args need not be saved. */
6110 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6111 adjust_address (regbuf, BLKmode,
6112 n_floatregs * UNITS_PER_WORD),
6116 /* Return the address of the regbuf. */
6117 return XEXP (regbuf, 0);
6120 This is optimized to only save the regs that are necessary. Explicitly
6121 named args need not be saved.
6122 We explicitly build a pointer to the buffer because it halves the insn
6123 count when not optimizing (otherwise the pointer is built for each reg
6125 We emit the moves in reverse order so that we can use predecrement. */
6127 fpregs = gen_reg_rtx (Pmode);
6128 emit_move_insn (fpregs, XEXP (regbuf, 0));
6129 emit_insn (gen_addsi3 (fpregs, fpregs,
6130 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6134 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6136 emit_insn (gen_addsi3 (fpregs, fpregs,
6137 GEN_INT (-2 * UNITS_PER_WORD)));
6138 mem = gen_rtx_MEM (DFmode, fpregs);
6139 set_mem_alias_set (mem, alias_set);
6140 emit_move_insn (mem,
6141 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6143 regno = first_floatreg;
6146 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6147 mem = gen_rtx_MEM (SFmode, fpregs);
6148 set_mem_alias_set (mem, alias_set);
6149 emit_move_insn (mem,
6150 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6151 - (TARGET_LITTLE_ENDIAN != 0)));
6155 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6159 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6160 mem = gen_rtx_MEM (SFmode, fpregs);
6161 set_mem_alias_set (mem, alias_set);
6162 emit_move_insn (mem,
6163 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6166 /* Return the address of the regbuf. */
6167 return XEXP (regbuf, 0);
6170 /* Define the `__builtin_va_list' type for the ABI. */
6173 sh_build_builtin_va_list (void)
6175 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6178 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6179 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6180 return ptr_type_node;
6182 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6184 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6186 f_next_o_limit = build_decl (FIELD_DECL,
6187 get_identifier ("__va_next_o_limit"),
6189 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6191 f_next_fp_limit = build_decl (FIELD_DECL,
6192 get_identifier ("__va_next_fp_limit"),
6194 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6197 DECL_FIELD_CONTEXT (f_next_o) = record;
6198 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6199 DECL_FIELD_CONTEXT (f_next_fp) = record;
6200 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6201 DECL_FIELD_CONTEXT (f_next_stack) = record;
6203 TYPE_FIELDS (record) = f_next_o;
6204 TREE_CHAIN (f_next_o) = f_next_o_limit;
6205 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6206 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6207 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6209 layout_type (record);
6214 /* Implement `va_start' for varargs and stdarg. */
6217 sh_va_start (tree valist, rtx nextarg)
6219 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6220 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6226 expand_builtin_saveregs ();
6227 std_expand_builtin_va_start (valist, nextarg);
6231 if ((! TARGET_SH2E && ! TARGET_SH4)
6232 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6234 std_expand_builtin_va_start (valist, nextarg);
6238 f_next_o = TYPE_FIELDS (va_list_type_node);
6239 f_next_o_limit = TREE_CHAIN (f_next_o);
6240 f_next_fp = TREE_CHAIN (f_next_o_limit);
6241 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6242 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6244 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6246 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6247 valist, f_next_o_limit, NULL_TREE);
6248 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6250 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6251 valist, f_next_fp_limit, NULL_TREE);
6252 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6253 valist, f_next_stack, NULL_TREE);
6255 /* Call __builtin_saveregs. */
6256 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6257 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6258 TREE_SIDE_EFFECTS (t) = 1;
6259 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6261 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6266 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6267 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6268 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6269 TREE_SIDE_EFFECTS (t) = 1;
6270 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6272 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6273 TREE_SIDE_EFFECTS (t) = 1;
6274 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6276 nint = current_function_args_info.arg_count[SH_ARG_INT];
6281 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6282 build_int_2 (UNITS_PER_WORD * nint, 0)));
6283 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6284 TREE_SIDE_EFFECTS (t) = 1;
6285 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6287 u = make_tree (ptr_type_node, nextarg);
6288 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6289 TREE_SIDE_EFFECTS (t) = 1;
6290 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6293 /* Implement `va_arg'. */
6296 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6297 tree *post_p ATTRIBUTE_UNUSED)
6299 HOST_WIDE_INT size, rsize;
6300 tree tmp, pptr_type_node;
6301 tree addr, lab_over, result = NULL;
6302 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
6305 type = build_pointer_type (type);
6307 size = int_size_in_bytes (type);
6308 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6309 pptr_type_node = build_pointer_type (ptr_type_node);
6311 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6312 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6314 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6315 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6319 f_next_o = TYPE_FIELDS (va_list_type_node);
6320 f_next_o_limit = TREE_CHAIN (f_next_o);
6321 f_next_fp = TREE_CHAIN (f_next_o_limit);
6322 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6323 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6325 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6327 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6328 valist, f_next_o_limit, NULL_TREE);
6329 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6330 valist, f_next_fp, NULL_TREE);
6331 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6332 valist, f_next_fp_limit, NULL_TREE);
6333 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6334 valist, f_next_stack, NULL_TREE);
6336 /* Structures with a single member with a distinct mode are passed
6337 like their member. This is relevant if the latter has a REAL_TYPE
6338 or COMPLEX_TYPE type. */
6339 if (TREE_CODE (type) == RECORD_TYPE
6340 && TYPE_FIELDS (type)
6341 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6342 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6343 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6344 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6345 type = TREE_TYPE (TYPE_FIELDS (type));
6349 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6350 || (TREE_CODE (type) == COMPLEX_TYPE
6351 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6356 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6359 addr = create_tmp_var (pptr_type_node, NULL);
6360 lab_false = create_artificial_label ();
6361 lab_over = create_artificial_label ();
6363 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6368 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6369 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6371 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6372 tmp = build (COND_EXPR, void_type_node, tmp,
6373 build (GOTO_EXPR, void_type_node, lab_false),
6375 gimplify_and_add (tmp, pre_p);
6377 if (TYPE_ALIGN (type) > BITS_PER_WORD
6378 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6379 && (n_floatregs & 1)))
6381 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6382 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6383 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6384 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6385 gimplify_and_add (tmp, pre_p);
6388 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6389 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6390 gimplify_and_add (tmp, pre_p);
6392 #ifdef FUNCTION_ARG_SCmode_WART
6393 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6395 tree subtype = TREE_TYPE (type);
6398 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6399 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6401 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6402 real = get_initialized_tmp_var (real, pre_p, NULL);
6404 result = build (COMPLEX_EXPR, type, real, imag);
6405 result = get_initialized_tmp_var (result, pre_p, NULL);
6407 #endif /* FUNCTION_ARG_SCmode_WART */
6409 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6410 gimplify_and_add (tmp, pre_p);
6412 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6413 gimplify_and_add (tmp, pre_p);
6415 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6416 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6417 gimplify_and_add (tmp, pre_p);
6421 tmp = fold_convert (ptr_type_node, size_int (rsize));
6422 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6423 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6424 tmp = build (COND_EXPR, void_type_node, tmp,
6425 build (GOTO_EXPR, void_type_node, lab_false),
6427 gimplify_and_add (tmp, pre_p);
6429 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6430 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6431 gimplify_and_add (tmp, pre_p);
6433 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6434 gimplify_and_add (tmp, pre_p);
6436 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6437 gimplify_and_add (tmp, pre_p);
6439 if (size > 4 && ! TARGET_SH4)
6441 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6442 gimplify_and_add (tmp, pre_p);
6445 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6446 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6447 gimplify_and_add (tmp, pre_p);
6452 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6453 gimplify_and_add (tmp, pre_p);
6457 /* ??? In va-sh.h, there had been code to make values larger than
6458 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6460 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6463 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6464 gimplify_and_add (tmp, pre_p);
6466 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6467 gimplify_and_add (tmp, pre_p);
6473 result = build_fold_indirect_ref (result);
6479 sh_promote_prototypes (tree type)
6485 return ! sh_attr_renesas_p (type);
6488 /* Define where to put the arguments to a function.
6489 Value is zero to push the argument on the stack,
6490 or a hard register in which to store the argument.
6492 MODE is the argument's machine mode.
6493 TYPE is the data type of the argument (as a tree).
6494 This is null for libcalls where that information may
6496 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6497 the preceding args and about the function being called.
6498 NAMED is nonzero if this argument is a named parameter
6499 (otherwise it is an extra parameter matching an ellipsis).
6501 On SH the first args are normally in registers
6502 and the rest are pushed. Any arg that starts within the first
6503 NPARM_REGS words is at least partially passed in a register unless
6504 its data type forbids. */
6508 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6509 tree type, int named)
6511 if (! TARGET_SH5 && mode == VOIDmode)
6512 return GEN_INT (ca->renesas_abi ? 1 : 0);
6515 && PASS_IN_REG_P (*ca, mode, type)
6516 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6520 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6521 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6523 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6524 gen_rtx_REG (SFmode,
6526 + (ROUND_REG (*ca, mode) ^ 1)),
6528 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6529 gen_rtx_REG (SFmode,
6531 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6533 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6536 /* If the alignment of a DF value causes an SF register to be
6537 skipped, we will use that skipped register for the next SF
6539 if ((TARGET_HITACHI || ca->renesas_abi)
6540 && ca->free_single_fp_reg
6542 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6544 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6545 ^ (mode == SFmode && TARGET_SH4
6546 && TARGET_LITTLE_ENDIAN != 0
6547 && ! TARGET_HITACHI && ! ca->renesas_abi);
6548 return gen_rtx_REG (mode, regno);
6554 if (mode == VOIDmode && TARGET_SHCOMPACT)
6555 return GEN_INT (ca->call_cookie);
6557 /* The following test assumes unnamed arguments are promoted to
6559 if (mode == SFmode && ca->free_single_fp_reg)
6560 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6562 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6563 && (named || ! ca->prototype_p)
6564 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6566 if (! ca->prototype_p && TARGET_SHMEDIA)
6567 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6569 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6571 + ca->arg_count[(int) SH_ARG_FLOAT]);
6574 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6575 && (! TARGET_SHCOMPACT
6576 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6577 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6580 return gen_rtx_REG (mode, (FIRST_PARM_REG
6581 + ca->arg_count[(int) SH_ARG_INT]));
6590 /* Update the data in CUM to advance over an argument
6591 of mode MODE and data type TYPE.
6592 (TYPE is null for libcalls where that information may not be
6596 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6597 tree type, int named)
6601 else if (TARGET_SH5)
6603 tree type2 = (ca->byref && type
6606 enum machine_mode mode2 = (ca->byref && type
6609 int dwords = ((ca->byref
6612 ? int_size_in_bytes (type2)
6613 : GET_MODE_SIZE (mode2)) + 7) / 8;
6614 int numregs = MIN (dwords, NPARM_REGS (SImode)
6615 - ca->arg_count[(int) SH_ARG_INT]);
6619 ca->arg_count[(int) SH_ARG_INT] += numregs;
6620 if (TARGET_SHCOMPACT
6621 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6624 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6626 /* N.B. We want this also for outgoing. */
6627 ca->stack_regs += numregs;
6632 ca->stack_regs += numregs;
6633 ca->byref_regs += numregs;
6637 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6641 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6644 else if (dwords > numregs)
6646 int pushregs = numregs;
6648 if (TARGET_SHCOMPACT)
6649 ca->stack_regs += numregs;
6650 while (pushregs < NPARM_REGS (SImode) - 1
6651 && (CALL_COOKIE_INT_REG_GET
6653 NPARM_REGS (SImode) - pushregs)
6657 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6661 if (numregs == NPARM_REGS (SImode))
6663 |= CALL_COOKIE_INT_REG (0, 1)
6664 | CALL_COOKIE_STACKSEQ (numregs - 1);
6667 |= CALL_COOKIE_STACKSEQ (numregs);
6670 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6671 && (named || ! ca->prototype_p))
6673 if (mode2 == SFmode && ca->free_single_fp_reg)
6674 ca->free_single_fp_reg = 0;
6675 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6676 < NPARM_REGS (SFmode))
6679 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6681 - ca->arg_count[(int) SH_ARG_FLOAT]);
6683 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6685 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6687 if (ca->outgoing && numregs > 0)
6691 |= (CALL_COOKIE_INT_REG
6692 (ca->arg_count[(int) SH_ARG_INT]
6693 - numregs + ((numfpregs - 2) / 2),
6694 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6697 while (numfpregs -= 2);
6699 else if (mode2 == SFmode && (named)
6700 && (ca->arg_count[(int) SH_ARG_FLOAT]
6701 < NPARM_REGS (SFmode)))
6702 ca->free_single_fp_reg
6703 = FIRST_FP_PARM_REG - numfpregs
6704 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6710 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6712 /* Note that we've used the skipped register. */
6713 if (mode == SFmode && ca->free_single_fp_reg)
6715 ca->free_single_fp_reg = 0;
6718 /* When we have a DF after an SF, there's an SF register that get
6719 skipped in order to align the DF value. We note this skipped
6720 register, because the next SF value will use it, and not the
6721 SF that follows the DF. */
6723 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6725 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6726 + BASE_ARG_REG (mode));
6730 if (! (TARGET_SH4 || ca->renesas_abi)
6731 || PASS_IN_REG_P (*ca, mode, type))
6732 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6733 = (ROUND_REG (*ca, mode)
6735 ? ROUND_ADVANCE (int_size_in_bytes (type))
6736 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6739 /* The Renesas calling convention doesn't quite fit into this scheme since
6740 the address is passed like an invisible argument, but one that is always
6741 passed in memory. */
6743 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6745 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6747 return gen_rtx_REG (Pmode, 2);
6750 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6753 sh_return_in_memory (tree type, tree fndecl)
6757 if (TYPE_MODE (type) == BLKmode)
6758 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6760 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6764 return (TYPE_MODE (type) == BLKmode
6765 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6766 && TREE_CODE (type) == RECORD_TYPE));
6770 /* We actually emit the code in sh_expand_prologue. We used to use
6771 a static variable to flag that we need to emit this code, but that
6772 doesn't when inlining, when functions are deferred and then emitted
6773 later. Fortunately, we already have two flags that are part of struct
6774 function that tell if a function uses varargs or stdarg. */
6776 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6777 enum machine_mode mode,
6779 int *pretend_arg_size,
6780 int second_time ATTRIBUTE_UNUSED)
6782 if (! current_function_stdarg)
6784 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6786 int named_parm_regs, anon_parm_regs;
6788 named_parm_regs = (ROUND_REG (*ca, mode)
6790 ? ROUND_ADVANCE (int_size_in_bytes (type))
6791 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6792 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6793 if (anon_parm_regs > 0)
6794 *pretend_arg_size = anon_parm_regs * 4;
6799 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6805 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6807 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6811 /* Define the offset between two registers, one to be eliminated, and
6812 the other its replacement, at the start of a routine. */
6815 initial_elimination_offset (int from, int to)
6818 int regs_saved_rounding = 0;
6819 int total_saved_regs_space;
6820 int total_auto_space;
6821 int save_flags = target_flags;
6823 HARD_REG_SET live_regs_mask;
6825 shmedia_space_reserved_for_target_registers = false;
6826 regs_saved = calc_live_regs (&live_regs_mask);
6827 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6829 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6831 shmedia_space_reserved_for_target_registers = true;
6832 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6835 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6836 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6837 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6839 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6840 copy_flags = target_flags;
6841 target_flags = save_flags;
6843 total_saved_regs_space = regs_saved + regs_saved_rounding;
6845 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6846 return total_saved_regs_space + total_auto_space
6847 + current_function_args_info.byref_regs * 8;
6849 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6850 return total_saved_regs_space + total_auto_space
6851 + current_function_args_info.byref_regs * 8;
6853 /* Initial gap between fp and sp is 0. */
6854 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6857 if (from == RETURN_ADDRESS_POINTER_REGNUM
6858 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6862 int n = total_saved_regs_space;
6863 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6864 save_schedule schedule;
6867 n += total_auto_space;
6869 /* If it wasn't saved, there's not much we can do. */
6870 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6873 target_flags = copy_flags;
6875 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6876 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6877 if (entry->reg == pr_reg)
6879 target_flags = save_flags;
6880 return entry->offset;
6885 return total_auto_space;
6891 /* Handle machine specific pragmas to be semi-compatible with Renesas
6895 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6897 pragma_interrupt = 1;
6901 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6903 pragma_interrupt = pragma_trapa = 1;
6907 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6909 pragma_nosave_low_regs = 1;
6912 /* Generate 'handle_interrupt' attribute for decls */
6915 sh_insert_attributes (tree node, tree *attributes)
6917 if (! pragma_interrupt
6918 || TREE_CODE (node) != FUNCTION_DECL)
6921 /* We are only interested in fields. */
6922 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6925 /* Add a 'handle_interrupt' attribute. */
6926 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6931 /* Supported attributes:
6933 interrupt_handler -- specifies this function is an interrupt handler.
6935 sp_switch -- specifies an alternate stack for an interrupt handler
6938 trap_exit -- use a trapa to exit an interrupt function instead of
6941 renesas -- use Renesas calling/layout conventions (functions and
6946 const struct attribute_spec sh_attribute_table[] =
6948 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6949 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6950 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6951 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6952 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
6953 { NULL, 0, 0, false, false, false, NULL }
6956 /* Handle an "interrupt_handler" attribute; arguments as in
6957 struct attribute_spec.handler. */
6959 sh_handle_interrupt_handler_attribute (tree *node, tree name,
6960 tree args ATTRIBUTE_UNUSED,
6961 int flags ATTRIBUTE_UNUSED,
6964 if (TREE_CODE (*node) != FUNCTION_DECL)
6966 warning ("`%s' attribute only applies to functions",
6967 IDENTIFIER_POINTER (name));
6968 *no_add_attrs = true;
6970 else if (TARGET_SHCOMPACT)
6972 error ("attribute interrupt_handler is not compatible with -m5-compact");
6973 *no_add_attrs = true;
6979 /* Handle an "sp_switch" attribute; arguments as in
6980 struct attribute_spec.handler. */
6982 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
6983 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6985 if (TREE_CODE (*node) != FUNCTION_DECL)
6987 warning ("`%s' attribute only applies to functions",
6988 IDENTIFIER_POINTER (name));
6989 *no_add_attrs = true;
6991 else if (!pragma_interrupt)
6993 /* The sp_switch attribute only has meaning for interrupt functions. */
6994 warning ("`%s' attribute only applies to interrupt functions",
6995 IDENTIFIER_POINTER (name));
6996 *no_add_attrs = true;
6998 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7000 /* The argument must be a constant string. */
7001 warning ("`%s' attribute argument not a string constant",
7002 IDENTIFIER_POINTER (name));
7003 *no_add_attrs = true;
7007 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7008 TREE_STRING_POINTER (TREE_VALUE (args)));
7014 /* Handle an "trap_exit" attribute; arguments as in
7015 struct attribute_spec.handler. */
7017 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7018 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7020 if (TREE_CODE (*node) != FUNCTION_DECL)
7022 warning ("`%s' attribute only applies to functions",
7023 IDENTIFIER_POINTER (name));
7024 *no_add_attrs = true;
7026 else if (!pragma_interrupt)
7028 /* The trap_exit attribute only has meaning for interrupt functions. */
7029 warning ("`%s' attribute only applies to interrupt functions",
7030 IDENTIFIER_POINTER (name));
7031 *no_add_attrs = true;
7033 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7035 /* The argument must be a constant integer. */
7036 warning ("`%s' attribute argument not an integer constant",
7037 IDENTIFIER_POINTER (name));
7038 *no_add_attrs = true;
7042 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7049 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7050 tree name ATTRIBUTE_UNUSED,
7051 tree args ATTRIBUTE_UNUSED,
7052 int flags ATTRIBUTE_UNUSED,
7053 bool *no_add_attrs ATTRIBUTE_UNUSED)
7058 /* True if __attribute__((renesas)) or -mrenesas. */
7060 sh_attr_renesas_p (tree td)
7067 td = TREE_TYPE (td);
7068 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7072 /* True if __attribute__((renesas)) or -mrenesas, for the current
7075 sh_cfun_attr_renesas_p (void)
7077 return sh_attr_renesas_p (current_function_decl);
7081 sh_cfun_interrupt_handler_p (void)
7083 return (lookup_attribute ("interrupt_handler",
7084 DECL_ATTRIBUTES (current_function_decl))
7088 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7091 const char *const name;
7093 const char *const description;
7095 sh_target_switches[] = TARGET_SWITCHES;
7096 #define target_switches sh_target_switches
7098 /* Like default_pch_valid_p, but take flag_mask into account. */
7100 sh_pch_valid_p (const void *data_p, size_t len)
7102 const char *data = (const char *)data_p;
7103 const char *flag_that_differs = NULL;
7107 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7108 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7110 /* -fpic and -fpie also usually make a PCH invalid. */
7111 if (data[0] != flag_pic)
7112 return _("created and used with different settings of -fpic");
7113 if (data[1] != flag_pie)
7114 return _("created and used with different settings of -fpie");
7117 /* Check target_flags. */
7118 memcpy (&old_flags, data, sizeof (target_flags));
7119 if (((old_flags ^ target_flags) & flag_mask) != 0)
7121 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7125 bits = target_switches[i].value;
7129 if ((target_flags & bits) != (old_flags & bits))
7131 flag_that_differs = target_switches[i].name;
7137 data += sizeof (target_flags);
7138 len -= sizeof (target_flags);
7140 /* Check string options. */
7141 #ifdef TARGET_OPTIONS
7142 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7144 const char *str = *target_options[i].variable;
7148 l = strlen (str) + 1;
7149 if (len < l || memcmp (data, str, l) != 0)
7151 flag_that_differs = target_options[i].prefix;
7164 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7167 return _("out of memory");
7172 /* Predicates used by the templates. */
7174 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7175 Used only in general_movsrc_operand. */
7178 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7190 /* Returns 1 if OP can be source of a simple move operation.
7191 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7192 invalid as are subregs of system registers. */
7195 general_movsrc_operand (rtx op, enum machine_mode mode)
7197 if (GET_CODE (op) == MEM)
7199 rtx inside = XEXP (op, 0);
7200 if (GET_CODE (inside) == CONST)
7201 inside = XEXP (inside, 0);
7203 if (GET_CODE (inside) == LABEL_REF)
7206 if (GET_CODE (inside) == PLUS
7207 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7208 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7211 /* Only post inc allowed. */
7212 if (GET_CODE (inside) == PRE_DEC)
7216 if ((mode == QImode || mode == HImode)
7217 && (GET_CODE (op) == SUBREG
7218 && GET_CODE (XEXP (op, 0)) == REG
7219 && system_reg_operand (XEXP (op, 0), mode)))
7222 return general_operand (op, mode);
7225 /* Returns 1 if OP can be a destination of a move.
7226 Same as general_operand, but no preinc allowed. */
7229 general_movdst_operand (rtx op, enum machine_mode mode)
7231 /* Only pre dec allowed. */
7232 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7235 return general_operand (op, mode);
7238 /* Returns 1 if OP is a normal arithmetic register. */
7241 arith_reg_operand (rtx op, enum machine_mode mode)
7243 if (register_operand (op, mode))
7247 if (GET_CODE (op) == REG)
7249 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7250 regno = REGNO (SUBREG_REG (op));
7254 return (regno != T_REG && regno != PR_REG
7255 && ! TARGET_REGISTER_P (regno)
7256 && (regno != FPUL_REG || TARGET_SH4)
7257 && regno != MACH_REG && regno != MACL_REG);
7262 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7263 because this would lead to missing sign extensions when truncating from
7264 DImode to SImode. */
7266 arith_reg_dest (rtx op, enum machine_mode mode)
7268 if (mode == DImode && GET_CODE (op) == SUBREG
7269 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7271 return arith_reg_operand (op, mode);
7275 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7277 enum machine_mode op_mode = GET_MODE (op);
7279 if (GET_MODE_CLASS (op_mode) != MODE_INT
7280 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7282 if (! reload_completed)
7284 return true_regnum (op) <= LAST_GENERAL_REG;
7288 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7290 if (register_operand (op, mode))
7294 if (GET_CODE (op) == REG)
7296 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7297 regno = REGNO (SUBREG_REG (op));
7301 return (regno >= FIRST_PSEUDO_REGISTER
7302 || FP_REGISTER_P (regno));
7307 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7310 arith_operand (rtx op, enum machine_mode mode)
7312 if (arith_reg_operand (op, mode))
7317 /* FIXME: We should be checking whether the CONST_INT fits in a
7318 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7319 attempting to transform a sequence of two 64-bit sets of the
7320 same register from literal constants into a set and an add,
7321 when the difference is too wide for an add. */
7322 if (GET_CODE (op) == CONST_INT
7323 || EXTRA_CONSTRAINT_C16 (op))
7328 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7334 /* Returns 1 if OP is a valid source operand for a compare insn. */
7337 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7339 if (arith_reg_operand (op, mode))
7342 if (EXTRA_CONSTRAINT_Z (op))
7348 /* Return 1 if OP is a valid source operand for an SHmedia operation
7349 that takes either a register or a 6-bit immediate. */
7352 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7354 return (arith_reg_operand (op, mode)
7355 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7358 /* Returns 1 if OP is a valid source operand for a logical operation. */
7361 logical_operand (rtx op, enum machine_mode mode)
7363 if (arith_reg_operand (op, mode))
7368 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7373 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7380 and_operand (rtx op, enum machine_mode mode)
7382 if (logical_operand (op, mode))
7385 /* Check mshflo.l / mshflhi.l opportunities. */
7388 && GET_CODE (op) == CONST_INT
7389 && CONST_OK_FOR_J16 (INTVAL (op)))
7395 /* Nonzero if OP is a floating point value with value 0.0. */
7398 fp_zero_operand (rtx op)
7402 if (GET_MODE (op) != SFmode)
7405 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7406 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7409 /* Nonzero if OP is a floating point value with value 1.0. */
7412 fp_one_operand (rtx op)
7416 if (GET_MODE (op) != SFmode)
7419 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7420 return REAL_VALUES_EQUAL (r, dconst1);
7423 /* For -m4 and -m4-single-only, mode switching is used. If we are
7424 compiling without -mfmovd, movsf_ie isn't taken into account for
7425 mode switching. We could check in machine_dependent_reorg for
7426 cases where we know we are in single precision mode, but there is
7427 interface to find that out during reload, so we must avoid
7428 choosing an fldi alternative during reload and thus failing to
7429 allocate a scratch register for the constant loading. */
7433 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7437 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7439 enum rtx_code code = GET_CODE (op);
7440 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7444 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7446 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
7447 && GET_MODE (op) == PSImode);
7451 fpul_operand (rtx op, enum machine_mode mode)
7454 return fp_arith_reg_operand (op, mode);
7456 return (GET_CODE (op) == REG
7457 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7458 && GET_MODE (op) == mode);
7462 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7464 return (GET_CODE (op) == SYMBOL_REF);
7467 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7469 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7471 if (GET_CODE (op) != SYMBOL_REF)
7473 return SYMBOL_REF_TLS_MODEL (op);
7477 commutative_float_operator (rtx op, enum machine_mode mode)
7479 if (GET_MODE (op) != mode)
7481 switch (GET_CODE (op))
7493 noncommutative_float_operator (rtx op, enum machine_mode mode)
7495 if (GET_MODE (op) != mode)
7497 switch (GET_CODE (op))
7509 unary_float_operator (rtx op, enum machine_mode mode)
7511 if (GET_MODE (op) != mode)
7513 switch (GET_CODE (op))
7526 binary_float_operator (rtx op, enum machine_mode mode)
7528 if (GET_MODE (op) != mode)
7530 switch (GET_CODE (op))
7544 binary_logical_operator (rtx op, enum machine_mode mode)
7546 if (GET_MODE (op) != mode)
7548 switch (GET_CODE (op))
7561 equality_comparison_operator (rtx op, enum machine_mode mode)
7563 return ((mode == VOIDmode || GET_MODE (op) == mode)
7564 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7568 greater_comparison_operator (rtx op, enum machine_mode mode)
7570 if (mode != VOIDmode && GET_MODE (op) == mode)
7572 switch (GET_CODE (op))
7585 less_comparison_operator (rtx op, enum machine_mode mode)
7587 if (mode != VOIDmode && GET_MODE (op) == mode)
7589 switch (GET_CODE (op))
7601 /* Accept pseudos and branch target registers. */
7603 target_reg_operand (rtx op, enum machine_mode mode)
7606 || GET_MODE (op) != DImode)
7609 if (GET_CODE (op) == SUBREG)
7612 if (GET_CODE (op) != REG)
7615 /* We must protect ourselves from matching pseudos that are virtual
7616 register, because they will eventually be replaced with hardware
7617 registers that aren't branch-target registers. */
7618 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7619 || TARGET_REGISTER_P (REGNO (op)))
7625 /* Same as target_reg_operand, except that label_refs and symbol_refs
7626 are accepted before reload. */
7628 target_operand (rtx op, enum machine_mode mode)
7633 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7634 && EXTRA_CONSTRAINT_Csy (op))
7635 return ! reload_completed;
7637 return target_reg_operand (op, mode);
7641 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7645 if (GET_CODE (op) != CONST_INT)
7648 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7652 extend_reg_operand (rtx op, enum machine_mode mode)
7654 return (GET_CODE (op) == TRUNCATE
7656 : arith_reg_operand) (op, mode);
7660 trunc_hi_operand (rtx op, enum machine_mode mode)
7662 enum machine_mode op_mode = GET_MODE (op);
7664 if (op_mode != SImode && op_mode != DImode
7665 && op_mode != V4HImode && op_mode != V2SImode)
7667 return extend_reg_operand (op, mode);
7671 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7673 return (GET_CODE (op) == TRUNCATE
7675 : arith_reg_or_0_operand) (op, mode);
7679 general_extend_operand (rtx op, enum machine_mode mode)
7681 return (GET_CODE (op) == TRUNCATE
7683 : nonimmediate_operand) (op, mode);
7687 inqhi_operand (rtx op, enum machine_mode mode)
7689 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7692 /* Can't use true_regnum here because copy_cost wants to know about
7693 SECONDARY_INPUT_RELOAD_CLASS. */
7694 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7698 sh_rep_vec (rtx v, enum machine_mode mode)
7703 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7704 || (GET_MODE (v) != mode && mode != VOIDmode))
7706 i = XVECLEN (v, 0) - 2;
7707 x = XVECEXP (v, 0, i + 1);
7708 if (GET_MODE_UNIT_SIZE (mode) == 1)
7710 y = XVECEXP (v, 0, i);
7711 for (i -= 2; i >= 0; i -= 2)
7712 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7713 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7718 if (XVECEXP (v, 0, i) != x)
7723 /* Determine if V is a constant vector matching MODE with only one element
7724 that is not a sign extension. Two byte-sized elements count as one. */
7726 sh_1el_vec (rtx v, enum machine_mode mode)
7729 int i, last, least, sign_ix;
7732 if (GET_CODE (v) != CONST_VECTOR
7733 || (GET_MODE (v) != mode && mode != VOIDmode))
7735 /* Determine numbers of last and of least significant elements. */
7736 last = XVECLEN (v, 0) - 1;
7737 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7738 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7741 if (GET_MODE_UNIT_SIZE (mode) == 1)
7742 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7743 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7745 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7746 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7747 ? constm1_rtx : const0_rtx);
7748 i = XVECLEN (v, 0) - 1;
7750 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7757 sh_const_vec (rtx v, enum machine_mode mode)
7761 if (GET_CODE (v) != CONST_VECTOR
7762 || (GET_MODE (v) != mode && mode != VOIDmode))
7764 i = XVECLEN (v, 0) - 1;
7766 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7771 /* Return the destination address of a branch. */
7774 branch_dest (rtx branch)
7776 rtx dest = SET_SRC (PATTERN (branch));
7779 if (GET_CODE (dest) == IF_THEN_ELSE)
7780 dest = XEXP (dest, 1);
7781 dest = XEXP (dest, 0);
7782 dest_uid = INSN_UID (dest);
7783 return INSN_ADDRESSES (dest_uid);
7786 /* Return nonzero if REG is not used after INSN.
7787 We assume REG is a reload reg, and therefore does
7788 not live past labels. It may live past calls or jumps though. */
7790 reg_unused_after (rtx reg, rtx insn)
7795 /* If the reg is set by this instruction, then it is safe for our
7796 case. Disregard the case where this is a store to memory, since
7797 we are checking a register used in the store address. */
7798 set = single_set (insn);
7799 if (set && GET_CODE (SET_DEST (set)) != MEM
7800 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7803 while ((insn = NEXT_INSN (insn)))
7809 code = GET_CODE (insn);
7812 /* If this is a label that existed before reload, then the register
7813 if dead here. However, if this is a label added by reorg, then
7814 the register may still be live here. We can't tell the difference,
7815 so we just ignore labels completely. */
7816 if (code == CODE_LABEL)
7821 if (code == JUMP_INSN)
7824 /* If this is a sequence, we must handle them all at once.
7825 We could have for instance a call that sets the target register,
7826 and an insn in a delay slot that uses the register. In this case,
7827 we must return 0. */
7828 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7833 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7835 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7836 rtx set = single_set (this_insn);
7838 if (GET_CODE (this_insn) == CALL_INSN)
7840 else if (GET_CODE (this_insn) == JUMP_INSN)
7842 if (INSN_ANNULLED_BRANCH_P (this_insn))
7847 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7849 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7851 if (GET_CODE (SET_DEST (set)) != MEM)
7857 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7862 else if (code == JUMP_INSN)
7866 set = single_set (insn);
7867 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7869 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7870 return GET_CODE (SET_DEST (set)) != MEM;
7871 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7874 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7882 static GTY(()) rtx fpscr_rtx;
7884 get_fpscr_rtx (void)
7888 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7889 REG_USERVAR_P (fpscr_rtx) = 1;
7890 mark_user_reg (fpscr_rtx);
7892 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7893 mark_user_reg (fpscr_rtx);
7898 emit_sf_insn (rtx pat)
7904 emit_df_insn (rtx pat)
7910 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7912 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7916 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7918 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7923 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7925 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7929 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7931 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7935 /* ??? gcc does flow analysis strictly after common subexpression
7936 elimination. As a result, common subexpression elimination fails
7937 when there are some intervening statements setting the same register.
7938 If we did nothing about this, this would hurt the precision switching
7939 for SH4 badly. There is some cse after reload, but it is unable to
7940 undo the extra register pressure from the unused instructions, and
7941 it cannot remove auto-increment loads.
7943 A C code example that shows this flow/cse weakness for (at least) SH
7944 and sparc (as of gcc ss-970706) is this:
7958 So we add another pass before common subexpression elimination, to
7959 remove assignments that are dead due to a following assignment in the
7960 same basic block. */
7963 mark_use (rtx x, rtx *reg_set_block)
7969 code = GET_CODE (x);
7974 int regno = REGNO (x);
7975 int nregs = (regno < FIRST_PSEUDO_REGISTER
7976 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7980 reg_set_block[regno + nregs - 1] = 0;
7987 rtx dest = SET_DEST (x);
7989 if (GET_CODE (dest) == SUBREG)
7990 dest = SUBREG_REG (dest);
7991 if (GET_CODE (dest) != REG)
7992 mark_use (dest, reg_set_block);
7993 mark_use (SET_SRC (x), reg_set_block);
8000 const char *fmt = GET_RTX_FORMAT (code);
8002 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8005 mark_use (XEXP (x, i), reg_set_block);
8006 else if (fmt[i] == 'E')
8007 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8008 mark_use (XVECEXP (x, i, j), reg_set_block);
8015 static rtx get_free_reg (HARD_REG_SET);
8017 /* This function returns a register to use to load the address to load
8018 the fpscr from. Currently it always returns r1 or r7, but when we are
8019 able to use pseudo registers after combine, or have a better mechanism
8020 for choosing a register, it should be done here. */
8021 /* REGS_LIVE is the liveness information for the point for which we
8022 need this allocation. In some bare-bones exit blocks, r1 is live at the
8023 start. We can even have all of r0..r3 being live:
8024 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8025 INSN before which new insns are placed with will clobber the register
8026 we return. If a basic block consists only of setting the return value
8027 register to a pseudo and using that register, the return value is not
8028 live before or after this block, yet we we'll insert our insns right in
8032 get_free_reg (HARD_REG_SET regs_live)
8034 if (! TEST_HARD_REG_BIT (regs_live, 1))
8035 return gen_rtx_REG (Pmode, 1);
8037 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8038 there shouldn't be anything but a jump before the function end. */
8039 if (! TEST_HARD_REG_BIT (regs_live, 7))
8040 return gen_rtx_REG (Pmode, 7);
8045 /* This function will set the fpscr from memory.
8046 MODE is the mode we are setting it to. */
8048 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8050 enum attr_fp_mode fp_mode = mode;
8051 rtx addr_reg = get_free_reg (regs_live);
8053 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8054 emit_insn (gen_fpu_switch1 (addr_reg));
8056 emit_insn (gen_fpu_switch0 (addr_reg));
8059 /* Is the given character a logical line separator for the assembler? */
8060 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8061 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8065 sh_insn_length_adjustment (rtx insn)
8067 /* Instructions with unfilled delay slots take up an extra two bytes for
8068 the nop in the delay slot. */
8069 if (((GET_CODE (insn) == INSN
8070 && GET_CODE (PATTERN (insn)) != USE
8071 && GET_CODE (PATTERN (insn)) != CLOBBER)
8072 || GET_CODE (insn) == CALL_INSN
8073 || (GET_CODE (insn) == JUMP_INSN
8074 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8075 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8076 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8077 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8080 /* SH2e has a bug that prevents the use of annulled branches, so if
8081 the delay slot is not filled, we'll have to put a NOP in it. */
8082 if (sh_cpu == CPU_SH2E
8083 && GET_CODE (insn) == JUMP_INSN
8084 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8085 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8086 && get_attr_type (insn) == TYPE_CBRANCH
8087 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8090 /* sh-dsp parallel processing insn take four bytes instead of two. */
8092 if (GET_CODE (insn) == INSN)
8095 rtx body = PATTERN (insn);
8096 const char *template;
8098 int maybe_label = 1;
8100 if (GET_CODE (body) == ASM_INPUT)
8101 template = XSTR (body, 0);
8102 else if (asm_noperands (body) >= 0)
8104 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8113 while (c == ' ' || c == '\t');
8114 /* all sh-dsp parallel-processing insns start with p.
8115 The only non-ppi sh insn starting with p is pref.
8116 The only ppi starting with pr is prnd. */
8117 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8119 /* The repeat pseudo-insn expands two three insns, a total of
8120 six bytes in size. */
8121 else if ((c == 'r' || c == 'R')
8122 && ! strncasecmp ("epeat", template, 5))
8124 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8126 /* If this is a label, it is obviously not a ppi insn. */
8127 if (c == ':' && maybe_label)
8132 else if (c == '\'' || c == '"')
8137 maybe_label = c != ':';
8145 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8146 isn't protected by a PIC unspec. */
8148 nonpic_symbol_mentioned_p (rtx x)
8150 register const char *fmt;
8153 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8154 || GET_CODE (x) == PC)
8157 /* We don't want to look into the possible MEM location of a
8158 CONST_DOUBLE, since we're not going to use it, in general. */
8159 if (GET_CODE (x) == CONST_DOUBLE)
8162 if (GET_CODE (x) == UNSPEC
8163 && (XINT (x, 1) == UNSPEC_PIC
8164 || XINT (x, 1) == UNSPEC_GOT
8165 || XINT (x, 1) == UNSPEC_GOTOFF
8166 || XINT (x, 1) == UNSPEC_GOTPLT
8167 || XINT (x, 1) == UNSPEC_GOTTPOFF
8168 || XINT (x, 1) == UNSPEC_DTPOFF
8169 || XINT (x, 1) == UNSPEC_PLT))
8172 fmt = GET_RTX_FORMAT (GET_CODE (x));
8173 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8179 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8180 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8183 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8190 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8191 @GOTOFF in `reg'. */
8193 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8196 if (tls_symbolic_operand (orig, Pmode))
8199 if (GET_CODE (orig) == LABEL_REF
8200 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8203 reg = gen_reg_rtx (Pmode);
8205 emit_insn (gen_symGOTOFF2reg (reg, orig));
8208 else if (GET_CODE (orig) == SYMBOL_REF)
8211 reg = gen_reg_rtx (Pmode);
8213 emit_insn (gen_symGOT2reg (reg, orig));
8219 /* Mark the use of a constant in the literal table. If the constant
8220 has multiple labels, make it unique. */
8222 mark_constant_pool_use (rtx x)
8224 rtx insn, lab, pattern;
8229 switch (GET_CODE (x))
8239 /* Get the first label in the list of labels for the same constant
8240 and delete another labels in the list. */
8242 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8244 if (GET_CODE (insn) != CODE_LABEL
8245 || LABEL_REFS (insn) != NEXT_INSN (insn))
8250 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8251 INSN_DELETED_P (insn) = 1;
8253 /* Mark constants in a window. */
8254 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8256 if (GET_CODE (insn) != INSN)
8259 pattern = PATTERN (insn);
8260 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8263 switch (XINT (pattern, 1))
8265 case UNSPECV_CONST2:
8266 case UNSPECV_CONST4:
8267 case UNSPECV_CONST8:
8268 XVECEXP (pattern, 0, 1) = const1_rtx;
8270 case UNSPECV_WINDOW_END:
8271 if (XVECEXP (pattern, 0, 0) == x)
8274 case UNSPECV_CONST_END:
8284 /* Return true if it's possible to redirect BRANCH1 to the destination
8285 of an unconditional jump BRANCH2. We only want to do this if the
8286 resulting branch will have a short displacement. */
8288 sh_can_redirect_branch (rtx branch1, rtx branch2)
8290 if (flag_expensive_optimizations && simplejump_p (branch2))
8292 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8296 for (distance = 0, insn = NEXT_INSN (branch1);
8297 insn && distance < 256;
8298 insn = PREV_INSN (insn))
8303 distance += get_attr_length (insn);
8305 for (distance = 0, insn = NEXT_INSN (branch1);
8306 insn && distance < 256;
8307 insn = NEXT_INSN (insn))
8312 distance += get_attr_length (insn);
8318 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8320 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8321 unsigned int new_reg)
8323 /* Interrupt functions can only use registers that have already been
8324 saved by the prologue, even if they would normally be
8327 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8333 /* Function to update the integer COST
8334 based on the relationship between INSN that is dependent on
8335 DEP_INSN through the dependence LINK. The default is to make no
8336 adjustment to COST. This can be used for example to specify to
8337 the scheduler that an output- or anti-dependence does not incur
8338 the same cost as a data-dependence. The return value should be
8339 the new value for COST. */
8341 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8347 /* On SHmedia, if the dependence is an anti-dependence or
8348 output-dependence, there is no cost. */
8349 if (REG_NOTE_KIND (link) != 0)
8352 if (get_attr_is_mac_media (insn)
8353 && get_attr_is_mac_media (dep_insn))
8356 else if (REG_NOTE_KIND (link) == 0)
8358 enum attr_type dep_type, type;
8360 if (recog_memoized (insn) < 0
8361 || recog_memoized (dep_insn) < 0)
8364 dep_type = get_attr_type (dep_insn);
8365 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8367 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8368 && (type = get_attr_type (insn)) != TYPE_CALL
8369 && type != TYPE_SFUNC)
8372 /* The only input for a call that is timing-critical is the
8373 function's address. */
8374 if (GET_CODE(insn) == CALL_INSN)
8376 rtx call = PATTERN (insn);
8378 if (GET_CODE (call) == PARALLEL)
8379 call = XVECEXP (call, 0 ,0);
8380 if (GET_CODE (call) == SET)
8381 call = SET_SRC (call);
8382 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8383 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8386 /* Likewise, the most timing critical input for an sfuncs call
8387 is the function address. However, sfuncs typically start
8388 using their arguments pretty quickly.
8389 Assume a four cycle delay before they are needed. */
8390 /* All sfunc calls are parallels with at least four components.
8391 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8392 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8393 && XVECLEN (PATTERN (insn), 0) >= 4
8394 && (reg = sfunc_uses_reg (insn)))
8396 if (! reg_set_p (reg, dep_insn))
8399 /* When the preceding instruction loads the shift amount of
8400 the following SHAD/SHLD, the latency of the load is increased
8403 && get_attr_type (insn) == TYPE_DYN_SHIFT
8404 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8405 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8406 XEXP (SET_SRC (single_set (insn)),
8409 /* When an LS group instruction with a latency of less than
8410 3 cycles is followed by a double-precision floating-point
8411 instruction, FIPR, or FTRV, the latency of the first
8412 instruction is increased to 3 cycles. */
8414 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8415 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8417 /* The lsw register of a double-precision computation is ready one
8419 else if (reload_completed
8420 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8421 && (use_pat = single_set (insn))
8422 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8426 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8427 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8430 /* An anti-dependence penalty of two applies if the first insn is a double
8431 precision fadd / fsub / fmul. */
8432 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8433 && recog_memoized (dep_insn) >= 0
8434 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8435 /* A lot of alleged anti-flow dependences are fake,
8436 so check this one is real. */
8437 && flow_dependent_p (dep_insn, insn))
8444 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8445 if DEP_INSN is anti-flow dependent on INSN. */
8447 flow_dependent_p (rtx insn, rtx dep_insn)
8449 rtx tmp = PATTERN (insn);
8451 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8452 return tmp == NULL_RTX;
8455 /* A helper function for flow_dependent_p called through note_stores. */
8457 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8459 rtx * pinsn = (rtx *) data;
8461 if (*pinsn && reg_referenced_p (x, *pinsn))
8465 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8466 'special function' patterns (type sfunc) that clobber pr, but that
8467 do not look like function calls to leaf_function_p. Hence we must
8468 do this extra check. */
8472 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8475 /* This Function returns nonzero if the DFA based scheduler interface
8476 is to be used. At present this is only supported properly for the SH4.
8477 For the SH1 the current DFA model is just the converted form of the old
8478 pipeline model description. */
8480 sh_use_dfa_interface (void)
8488 /* This function returns "2" to indicate dual issue for the SH4
8489 processor. To be used by the DFA pipeline description. */
8491 sh_issue_rate (void)
8493 if (TARGET_SUPERSCALAR)
8499 /* Functions for ready queue reordering for sched1. */
8501 /* Get weight for mode for a set x. */
8503 find_set_regmode_weight (rtx x, enum machine_mode mode)
8505 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8507 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8509 if (GET_CODE (SET_DEST (x)) == REG)
8511 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8521 /* Get regmode weight for insn. */
8523 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8525 short reg_weight = 0;
8528 /* Increment weight for each register born here. */
8530 reg_weight += find_set_regmode_weight (x, mode);
8531 if (GET_CODE (x) == PARALLEL)
8534 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8536 x = XVECEXP (PATTERN (insn), 0, j);
8537 reg_weight += find_set_regmode_weight (x, mode);
8540 /* Decrement weight for each register that dies here. */
8541 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8543 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8545 rtx note = XEXP (x, 0);
8546 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8553 /* Calculate regmode weights for all insns of a basic block. */
8555 find_regmode_weight (int b, enum machine_mode mode)
8557 rtx insn, next_tail, head, tail;
8559 get_block_head_tail (b, &head, &tail);
8560 next_tail = NEXT_INSN (tail);
8562 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8564 /* Handle register life information. */
8569 INSN_REGMODE_WEIGHT (insn, mode) =
8570 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8571 else if (mode == SImode)
8572 INSN_REGMODE_WEIGHT (insn, mode) =
8573 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8577 /* Comparison function for ready queue sorting. */
8579 rank_for_reorder (const void *x, const void *y)
8581 rtx tmp = *(const rtx *) y;
8582 rtx tmp2 = *(const rtx *) x;
8584 /* The insn in a schedule group should be issued the first. */
8585 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8586 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8588 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8589 minimizes instruction movement, thus minimizing sched's effect on
8590 register pressure. */
8591 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8594 /* Resort the array A in which only element at index N may be out of order. */
8596 swap_reorder (rtx *a, int n)
8598 rtx insn = a[n - 1];
8601 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8609 #define SCHED_REORDER(READY, N_READY) \
8612 if ((N_READY) == 2) \
8613 swap_reorder (READY, N_READY); \
8614 else if ((N_READY) > 2) \
8615 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8619 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8622 ready_reorder (rtx *ready, int nready)
8624 SCHED_REORDER (ready, nready);
8627 /* Calculate regmode weights for all insns of all basic block. */
8629 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8630 int verbose ATTRIBUTE_UNUSED,
8635 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8636 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8638 FOR_EACH_BB_REVERSE (b)
8640 find_regmode_weight (b->index, SImode);
8641 find_regmode_weight (b->index, SFmode);
8644 CURR_REGMODE_PRESSURE (SImode) = 0;
8645 CURR_REGMODE_PRESSURE (SFmode) = 0;
8651 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8652 int verbose ATTRIBUTE_UNUSED)
8654 if (regmode_weight[0])
8656 free (regmode_weight[0]);
8657 regmode_weight[0] = NULL;
8659 if (regmode_weight[1])
8661 free (regmode_weight[1]);
8662 regmode_weight[1] = NULL;
8666 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8667 keep count of register pressures on SImode and SFmode. */
8669 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8670 int sched_verbose ATTRIBUTE_UNUSED,
8674 if (GET_CODE (PATTERN (insn)) != USE
8675 && GET_CODE (PATTERN (insn)) != CLOBBER)
8676 cached_can_issue_more = can_issue_more - 1;
8678 cached_can_issue_more = can_issue_more;
8680 if (reload_completed)
8681 return cached_can_issue_more;
8683 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8684 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8686 return cached_can_issue_more;
8690 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8691 int verbose ATTRIBUTE_UNUSED,
8692 int veclen ATTRIBUTE_UNUSED)
8694 CURR_REGMODE_PRESSURE (SImode) = 0;
8695 CURR_REGMODE_PRESSURE (SFmode) = 0;
8698 /* Some magic numbers. */
8699 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8700 functions that already have high pressure on r0. */
8701 #define R0_MAX_LIFE_REGIONS 2
8702 #define R0_MAX_LIVE_LENGTH 12
8703 /* Register Pressure thresholds for SImode and SFmode registers. */
8704 #define SIMODE_MAX_WEIGHT 5
8705 #define SFMODE_MAX_WEIGHT 10
8707 /* Return true if the pressure is high for MODE. */
8709 high_pressure (enum machine_mode mode)
8711 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8712 functions that already have high pressure on r0. */
8713 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8714 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8718 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8720 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8723 /* Reorder ready queue if register pressure is high. */
8725 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8726 int sched_verbose ATTRIBUTE_UNUSED,
8729 int clock_var ATTRIBUTE_UNUSED)
8731 if (reload_completed)
8732 return sh_issue_rate ();
8734 if (high_pressure (SFmode) || high_pressure (SImode))
8736 ready_reorder (ready, *n_readyp);
8739 return sh_issue_rate ();
8742 /* Skip cycles if the current register pressure is high. */
8744 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8745 int sched_verbose ATTRIBUTE_UNUSED,
8746 rtx *ready ATTRIBUTE_UNUSED,
8747 int *n_readyp ATTRIBUTE_UNUSED,
8748 int clock_var ATTRIBUTE_UNUSED)
8750 if (reload_completed)
8751 return cached_can_issue_more;
8753 if (high_pressure(SFmode) || high_pressure (SImode))
8756 return cached_can_issue_more;
8759 /* Skip cycles without sorting the ready queue. This will move insn from
8760 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8761 queue by sh_reorder. */
8763 /* Generally, skipping these many cycles are sufficient for all insns to move
8768 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8769 int sched_verbose ATTRIBUTE_UNUSED,
8770 rtx insn ATTRIBUTE_UNUSED,
8775 if (reload_completed)
8780 if ((clock_var - last_clock_var) < MAX_SKIPS)
8785 /* If this is the last cycle we are skipping, allow reordering of R. */
8786 if ((clock_var - last_clock_var) == MAX_SKIPS)
8798 /* SHmedia requires registers for branches, so we can't generate new
8799 branches past reload. */
8801 sh_cannot_modify_jumps_p (void)
8803 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8807 sh_target_reg_class (void)
8809 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8813 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8815 return (shmedia_space_reserved_for_target_registers
8816 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8820 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8822 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8826 On the SH1..SH4, the trampoline looks like
8827 2 0002 D202 mov.l l2,r2
8828 1 0000 D301 mov.l l1,r3
8831 5 0008 00000000 l1: .long area
8832 6 000c 00000000 l2: .long function
8834 SH5 (compact) uses r1 instead of r3 for the static chain. */
8837 /* Emit RTL insns to initialize the variable parts of a trampoline.
8838 FNADDR is an RTX for the address of the function's pure code.
8839 CXT is an RTX for the static chain value for the function. */
8842 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8844 if (TARGET_SHMEDIA64)
8849 rtx movi1 = GEN_INT (0xcc000010);
8850 rtx shori1 = GEN_INT (0xc8000010);
8853 /* The following trampoline works within a +- 128 KB range for cxt:
8854 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8855 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8856 gettr tr1,r1; blink tr0,r63 */
8857 /* Address rounding makes it hard to compute the exact bounds of the
8858 offset for this trampoline, but we have a rather generous offset
8859 range, so frame_offset should do fine as an upper bound. */
8860 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8862 /* ??? could optimize this trampoline initialization
8863 by writing DImode words with two insns each. */
8864 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8865 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8866 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8867 insn = gen_rtx_AND (DImode, insn, mask);
8868 /* Or in ptb/u .,tr1 pattern */
8869 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8870 insn = force_operand (insn, NULL_RTX);
8871 insn = gen_lowpart (SImode, insn);
8872 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8873 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8874 insn = gen_rtx_AND (DImode, insn, mask);
8875 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8876 insn = gen_lowpart (SImode, insn);
8877 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8878 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8879 insn = gen_rtx_AND (DImode, insn, mask);
8880 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8881 insn = gen_lowpart (SImode, insn);
8882 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8883 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8884 insn = gen_rtx_AND (DImode, insn, mask);
8885 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8886 insn = gen_lowpart (SImode, insn);
8887 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8889 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8890 insn = gen_rtx_AND (DImode, insn, mask);
8891 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8892 insn = gen_lowpart (SImode, insn);
8893 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8895 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8896 GEN_INT (0x6bf10600));
8897 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8898 GEN_INT (0x4415fc10));
8899 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8900 GEN_INT (0x4401fff0));
8901 emit_insn (gen_ic_invalidate_line (tramp));
8904 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8905 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8907 tramp_templ = gen_datalabel_ref (tramp_templ);
8908 dst = gen_rtx_MEM (BLKmode, tramp);
8909 src = gen_rtx_MEM (BLKmode, tramp_templ);
8910 set_mem_align (dst, 256);
8911 set_mem_align (src, 64);
8912 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8914 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8916 emit_move_insn (gen_rtx_MEM (Pmode,
8917 plus_constant (tramp,
8919 + GET_MODE_SIZE (Pmode))),
8921 emit_insn (gen_ic_invalidate_line (tramp));
8924 else if (TARGET_SHMEDIA)
8926 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8927 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8928 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8929 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8930 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8931 rotated 10 right, and higher 16 bit of every 32 selected. */
8933 = force_reg (V2HImode, (simplify_gen_subreg
8934 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8935 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8936 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8938 tramp = force_reg (Pmode, tramp);
8939 fnaddr = force_reg (SImode, fnaddr);
8940 cxt = force_reg (SImode, cxt);
8941 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8942 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8944 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8945 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8946 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
8947 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8948 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8949 gen_rtx_SUBREG (V2HImode, cxt, 0),
8951 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8952 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8953 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
8954 if (TARGET_LITTLE_ENDIAN)
8956 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8957 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8961 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8962 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8964 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8965 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8966 emit_insn (gen_ic_invalidate_line (tramp));
8969 else if (TARGET_SHCOMPACT)
8971 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8974 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8975 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8977 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8978 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8980 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8982 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8986 if (TARGET_USERMODE)
8987 emit_library_call (function_symbol ("__ic_invalidate"),
8988 0, VOIDmode, 1, tramp, SImode);
8990 emit_insn (gen_ic_invalidate_line (tramp));
8994 /* FIXME: This is overly conservative. A SHcompact function that
8995 receives arguments ``by reference'' will have them stored in its
8996 own stack frame, so it must not pass pointers or references to
8997 these arguments to other functions by means of sibling calls. */
8999 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9002 && (! TARGET_SHCOMPACT
9003 || current_function_args_info.stack_regs == 0)
9004 && ! sh_cfun_interrupt_handler_p ());
9007 /* Machine specific built-in functions. */
9009 struct builtin_description
9011 const enum insn_code icode;
9012 const char *const name;
9016 /* describe number and signedness of arguments; arg[0] == result
9017 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9018 static const char signature_args[][4] =
9020 #define SH_BLTIN_V2SI2 0
9022 #define SH_BLTIN_V4HI2 1
9024 #define SH_BLTIN_V2SI3 2
9026 #define SH_BLTIN_V4HI3 3
9028 #define SH_BLTIN_V8QI3 4
9030 #define SH_BLTIN_MAC_HISI 5
9032 #define SH_BLTIN_SH_HI 6
9034 #define SH_BLTIN_SH_SI 7
9036 #define SH_BLTIN_V4HI2V2SI 8
9038 #define SH_BLTIN_V4HI2V8QI 9
9040 #define SH_BLTIN_SISF 10
9042 #define SH_BLTIN_LDUA_L 11
9044 #define SH_BLTIN_LDUA_Q 12
9046 #define SH_BLTIN_STUA_L 13
9048 #define SH_BLTIN_STUA_Q 14
9050 #define SH_BLTIN_UDI 15
9052 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9053 #define SH_BLTIN_2 16
9054 #define SH_BLTIN_SU 16
9056 #define SH_BLTIN_3 17
9057 #define SH_BLTIN_SUS 17
9059 #define SH_BLTIN_PSSV 18
9061 #define SH_BLTIN_XXUU 19
9062 #define SH_BLTIN_UUUU 19
9064 #define SH_BLTIN_PV 20
9067 /* mcmv: operands considered unsigned. */
9068 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9069 /* mperm: control value considered unsigned int. */
9070 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9071 /* mshards_q: returns signed short. */
9072 /* nsb: takes long long arg, returns unsigned char. */
9073 static const struct builtin_description bdesc[] =
9075 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9076 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9077 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9078 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9079 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9080 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9081 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9083 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9084 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9086 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9087 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9088 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9089 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9090 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9091 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9092 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9093 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9094 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9095 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9096 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9097 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9098 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9099 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9100 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9101 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9102 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9103 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9104 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9105 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9106 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9107 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9108 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9109 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9110 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9111 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9112 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9113 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9114 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9115 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9116 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9117 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9118 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9119 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9120 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9121 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9122 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9123 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9124 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9125 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9126 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9127 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9128 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9129 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9130 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9131 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9132 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9133 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9134 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9135 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9136 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9137 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9138 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9139 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9141 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9142 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9143 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9144 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9145 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9146 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9147 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9148 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9149 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9150 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9151 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9152 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9153 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9154 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9155 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9156 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9158 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9159 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9161 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9162 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9167 sh_media_init_builtins (void)
9169 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9170 const struct builtin_description *d;
9172 memset (shared, 0, sizeof shared);
9173 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9175 tree type, arg_type;
9176 int signature = d->signature;
9179 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9180 type = shared[signature];
9183 int has_result = signature_args[signature][0] != 0;
9185 if (signature_args[signature][1] == 8
9186 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9188 if (! TARGET_FPU_ANY
9189 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9191 type = void_list_node;
9194 int arg = signature_args[signature][i];
9195 int opno = i - 1 + has_result;
9198 arg_type = ptr_type_node;
9200 arg_type = ((*lang_hooks.types.type_for_mode)
9201 (insn_data[d->icode].operand[opno].mode,
9206 arg_type = void_type_node;
9209 type = tree_cons (NULL_TREE, arg_type, type);
9211 type = build_function_type (arg_type, type);
9212 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9213 shared[signature] = type;
9215 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9221 sh_init_builtins (void)
9224 sh_media_init_builtins ();
9227 /* Expand an expression EXP that calls a built-in function,
9228 with result going to TARGET if that's convenient
9229 (and in mode MODE if that's convenient).
9230 SUBTARGET may be used as the target for computing one of EXP's operands.
9231 IGNORE is nonzero if the value is to be ignored. */
9234 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9235 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9237 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9238 tree arglist = TREE_OPERAND (exp, 1);
9239 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9240 const struct builtin_description *d = &bdesc[fcode];
9241 enum insn_code icode = d->icode;
9242 int signature = d->signature;
9243 enum machine_mode tmode = VOIDmode;
9248 if (signature_args[signature][0])
9253 tmode = insn_data[icode].operand[0].mode;
9255 || GET_MODE (target) != tmode
9256 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9257 target = gen_reg_rtx (tmode);
9263 for (i = 1; i <= 3; i++, nop++)
9266 enum machine_mode opmode, argmode;
9268 if (! signature_args[signature][i])
9270 arg = TREE_VALUE (arglist);
9271 if (arg == error_mark_node)
9273 arglist = TREE_CHAIN (arglist);
9274 opmode = insn_data[icode].operand[nop].mode;
9275 argmode = TYPE_MODE (TREE_TYPE (arg));
9276 if (argmode != opmode)
9277 arg = build1 (NOP_EXPR,
9278 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9279 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9280 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9281 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9287 pat = (*insn_data[d->icode].genfun) (op[0]);
9290 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9293 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9296 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9308 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9310 rtx sel0 = const0_rtx;
9311 rtx sel1 = const1_rtx;
9312 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9313 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9315 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9316 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9320 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9322 rtx sel0 = const0_rtx;
9323 rtx sel1 = const1_rtx;
9324 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9326 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9328 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9329 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9332 /* Return the class of registers for which a mode change from FROM to TO
9335 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9336 enum reg_class class)
9338 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9340 if (TARGET_LITTLE_ENDIAN)
9342 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9343 return reg_classes_intersect_p (DF_REGS, class);
9347 if (GET_MODE_SIZE (from) < 8)
9348 return reg_classes_intersect_p (DF_HI_REGS, class);
9355 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9356 that label is used. */
9359 sh_mark_label (rtx address, int nuses)
9361 if (GOTOFF_P (address))
9363 /* Extract the label or symbol. */
9364 address = XEXP (address, 0);
9365 if (GET_CODE (address) == PLUS)
9366 address = XEXP (address, 0);
9367 address = XVECEXP (address, 0, 0);
9369 if (GET_CODE (address) == LABEL_REF
9370 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9371 LABEL_NUSES (XEXP (address, 0)) += nuses;
9374 /* Compute extra cost of moving data between one register class
9377 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9378 uses this information. Hence, the general register <-> floating point
9379 register information here is not used for SFmode. */
9382 sh_register_move_cost (enum machine_mode mode,
9383 enum reg_class srcclass, enum reg_class dstclass)
9385 if (dstclass == T_REGS || dstclass == PR_REGS)
9388 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9391 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9392 && REGCLASS_HAS_FP_REG (srcclass)
9393 && REGCLASS_HAS_FP_REG (dstclass))
9396 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9397 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9400 if ((REGCLASS_HAS_FP_REG (dstclass)
9401 && REGCLASS_HAS_GENERAL_REG (srcclass))
9402 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9403 && REGCLASS_HAS_FP_REG (srcclass)))
9404 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9405 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9407 if ((dstclass == FPUL_REGS
9408 && REGCLASS_HAS_GENERAL_REG (srcclass))
9409 || (srcclass == FPUL_REGS
9410 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9413 if ((dstclass == FPUL_REGS
9414 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9415 || (srcclass == FPUL_REGS
9416 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9419 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9420 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9423 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9424 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9429 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9430 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9431 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9433 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9436 /* Like register_operand, but take into account that SHMEDIA can use
9437 the constant zero like a general register. */
9439 sh_register_operand (rtx op, enum machine_mode mode)
9441 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9443 return register_operand (op, mode);
9447 cmpsi_operand (rtx op, enum machine_mode mode)
9449 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9450 && GET_MODE (op) == SImode)
9452 return arith_operand (op, mode);
9455 static rtx emit_load_ptr (rtx, rtx);
9458 emit_load_ptr (rtx reg, rtx addr)
9460 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9462 if (Pmode != ptr_mode)
9463 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9464 return emit_move_insn (reg, mem);
9468 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9469 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9472 CUMULATIVE_ARGS cum;
9473 int structure_value_byref = 0;
9474 rtx this, this_value, sibcall, insns, funexp;
9475 tree funtype = TREE_TYPE (function);
9476 int simple_add = CONST_OK_FOR_ADD (delta);
9478 rtx scratch0, scratch1, scratch2;
9480 reload_completed = 1;
9481 epilogue_completed = 1;
9483 current_function_uses_only_leaf_regs = 1;
9484 reset_block_changes ();
9486 emit_note (NOTE_INSN_PROLOGUE_END);
9488 /* Find the "this" pointer. We have such a wide range of ABIs for the
9489 SH that it's best to do this completely machine independently.
9490 "this" is passed as first argument, unless a structure return pointer
9491 comes first, in which case "this" comes second. */
9492 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9493 #ifndef PCC_STATIC_STRUCT_RETURN
9494 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9495 structure_value_byref = 1;
9496 #endif /* not PCC_STATIC_STRUCT_RETURN */
9497 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9499 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9501 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9503 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9505 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9506 static chain pointer (even if you can't have nested virtual functions
9507 right now, someone might implement them sometime), and the rest of the
9508 registers are used for argument passing, are callee-saved, or reserved. */
9509 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9512 scratch1 = gen_rtx_REG (ptr_mode, 1);
9513 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9514 pointing where to return struct values. */
9515 scratch2 = gen_rtx_REG (Pmode, 3);
9517 else if (TARGET_SHMEDIA)
9519 scratch1 = gen_rtx_REG (ptr_mode, 21);
9520 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9523 this_value = plus_constant (this, delta);
9525 && (simple_add || scratch0 != scratch1)
9526 && strict_memory_address_p (ptr_mode, this_value))
9528 emit_load_ptr (scratch0, this_value);
9534 else if (simple_add)
9535 emit_move_insn (this, this_value);
9538 emit_move_insn (scratch1, GEN_INT (delta));
9539 emit_insn (gen_add2_insn (this, scratch1));
9547 emit_load_ptr (scratch0, this);
9549 offset_addr = plus_constant (scratch0, vcall_offset);
9550 if (strict_memory_address_p (ptr_mode, offset_addr))
9552 else if (! TARGET_SH5)
9554 /* scratch0 != scratch1, and we have indexed loads. Get better
9555 schedule by loading the offset into r1 and using an indexed
9556 load - then the load of r1 can issue before the load from
9557 (this + delta) finishes. */
9558 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9559 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9561 else if (CONST_OK_FOR_ADD (vcall_offset))
9563 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9564 offset_addr = scratch0;
9566 else if (scratch0 != scratch1)
9568 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9569 emit_insn (gen_add2_insn (scratch0, scratch1));
9570 offset_addr = scratch0;
9573 abort (); /* FIXME */
9574 emit_load_ptr (scratch0, offset_addr);
9576 if (Pmode != ptr_mode)
9577 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9578 emit_insn (gen_add2_insn (this, scratch0));
9581 /* Generate a tail call to the target function. */
9582 if (! TREE_USED (function))
9584 assemble_external (function);
9585 TREE_USED (function) = 1;
9587 funexp = XEXP (DECL_RTL (function), 0);
9588 emit_move_insn (scratch2, funexp);
9589 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9590 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9591 SIBLING_CALL_P (sibcall) = 1;
9592 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9595 /* Run just enough of rest_of_compilation to do scheduling and get
9596 the insns emitted. Note that use_thunk calls
9597 assemble_start_function and assemble_end_function. */
9599 insn_locators_initialize ();
9600 insns = get_insns ();
9602 if (optimize > 0 && flag_schedule_insns_after_reload)
9604 find_basic_blocks (insns, max_reg_num (), dump_file);
9605 life_analysis (dump_file, PROP_FINAL);
9607 split_all_insns (1);
9609 schedule_insns (dump_file);
9614 if (optimize > 0 && flag_delayed_branch)
9615 dbr_schedule (insns, dump_file);
9616 shorten_branches (insns);
9617 final_start_function (insns, file, 1);
9618 final (insns, file, 1, 0);
9619 final_end_function ();
9621 if (optimize > 0 && flag_schedule_insns_after_reload)
9623 /* Release all memory allocated by flow. */
9624 free_basic_block_vars ();
9626 /* Release all memory held by regsets now. */
9627 regset_release_memory ();
9630 reload_completed = 0;
9631 epilogue_completed = 0;
9636 function_symbol (const char *name)
9638 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9639 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9643 /* Find the number of a general purpose register in S. */
9645 scavenge_reg (HARD_REG_SET *s)
9648 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9649 if (TEST_HARD_REG_BIT (*s, r))
9655 sh_get_pr_initial_val (void)
9659 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9660 PR register on SHcompact, because it might be clobbered by the prologue.
9661 We check first if that is known to be the case. */
9662 if (TARGET_SHCOMPACT
9663 && ((current_function_args_info.call_cookie
9664 & ~ CALL_COOKIE_RET_TRAMP (1))
9665 || current_function_has_nonlocal_label))
9666 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9668 /* If we haven't finished rtl generation, there might be a nonlocal label
9669 that we haven't seen yet.
9670 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9671 is set, unless it has been called before for the same register. And even
9672 then, we end in trouble if we didn't use the register in the same
9673 basic block before. So call get_hard_reg_initial_val now and wrap it
9674 in an unspec if we might need to replace it. */
9675 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9676 combine can put the pseudo returned by get_hard_reg_initial_val into
9677 instructions that need a general purpose registers, which will fail to
9678 be recognized when the pseudo becomes allocated to PR. */
9680 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9682 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9687 sh_expand_t_scc (enum rtx_code code, rtx target)
9689 rtx result = target;
9692 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9693 || GET_CODE (sh_compare_op1) != CONST_INT)
9695 if (GET_CODE (result) != REG)
9696 result = gen_reg_rtx (SImode);
9697 val = INTVAL (sh_compare_op1);
9698 if ((code == EQ && val == 1) || (code == NE && val == 0))
9699 emit_insn (gen_movt (result));
9700 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9702 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9703 emit_insn (gen_subc (result, result, result));
9704 emit_insn (gen_addsi3 (result, result, const1_rtx));
9706 else if (code == EQ || code == NE)
9707 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9710 if (result != target)
9711 emit_move_insn (target, result);
9715 /* INSN is an sfunc; return the rtx that describes the address used. */
9717 extract_sfunc_addr (rtx insn)
9719 rtx pattern, part = NULL_RTX;
9722 pattern = PATTERN (insn);
9723 len = XVECLEN (pattern, 0);
9724 for (i = 0; i < len; i++)
9726 part = XVECEXP (pattern, 0, i);
9727 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9728 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9729 return XEXP (part, 0);
9731 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9732 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9736 /* Verify that the register in use_sfunc_addr still agrees with the address
9737 used in the sfunc. This prevents fill_slots_from_thread from changing
9739 INSN is the use_sfunc_addr instruction, and REG is the register it
9742 check_use_sfunc_addr (rtx insn, rtx reg)
9744 /* Search for the sfunc. It should really come right after INSN. */
9745 while ((insn = NEXT_INSN (insn)))
9747 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9749 if (! INSN_P (insn))
9752 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9753 insn = XVECEXP (PATTERN (insn), 0, 0);
9754 if (GET_CODE (PATTERN (insn)) != PARALLEL
9755 || get_attr_type (insn) != TYPE_SFUNC)
9757 return rtx_equal_p (extract_sfunc_addr (insn), reg);