1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
103 /* Provides the class number of the smallest class containing
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (int, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
527 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
531 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
538 case OPT_m4_single_only:
539 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
543 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
548 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
552 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
555 case OPT_m4a_single_only:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
563 case OPT_m5_32media_nofpu:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
571 case OPT_m5_64media_nofpu:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
579 case OPT_m5_compact_nofpu:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
588 /* Print the operand address in x to the stream. */
591 print_operand_address (FILE *stream, rtx x)
593 switch (GET_CODE (x))
597 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
602 rtx base = XEXP (x, 0);
603 rtx index = XEXP (x, 1);
605 switch (GET_CODE (index))
608 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
609 reg_names[true_regnum (base)]);
615 int base_num = true_regnum (base);
616 int index_num = true_regnum (index);
618 fprintf (stream, "@(r0,%s)",
619 reg_names[MAX (base_num, index_num)]);
630 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
634 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
638 x = mark_constant_pool_use (x);
639 output_addr_const (stream, x);
644 /* Print operand x (an rtx) in assembler syntax to file stream
645 according to modifier code.
647 '.' print a .s if insn needs delay slot
648 ',' print LOCAL_LABEL_PREFIX
649 '@' print trap, rte or rts depending upon pragma interruptness
650 '#' output a nop if there is nothing to put in the delay slot
651 ''' print likelihood suffix (/u for unlikely).
652 '>' print branch target if -fverbose-asm
653 'O' print a constant without the #
654 'R' print the LSW of a dp value - changes if in little endian
655 'S' print the MSW of a dp value - changes if in little endian
656 'T' print the next word of a dp value - same as 'R' in big endian mode.
657 'M' print an `x' if `m' will print `base,index'.
658 'N' print 'r63' if the operand is (const_int 0).
659 'd' print a V2SF reg as dN instead of fpN.
660 'm' print a pair `base,offset' or `base,index', for LD and ST.
661 'U' Likewise for {LD,ST}{HI,LO}.
662 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
663 'o' output an operator. */
666 print_operand (FILE *stream, rtx x, int code)
669 enum machine_mode mode;
677 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
678 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
679 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
682 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
685 trapa_attr = lookup_attribute ("trap_exit",
686 DECL_ATTRIBUTES (current_function_decl));
688 fprintf (stream, "trapa #%ld",
689 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
690 else if (sh_cfun_interrupt_handler_p ())
691 fprintf (stream, "rte");
693 fprintf (stream, "rts");
696 /* Output a nop if there's nothing in the delay slot. */
697 if (dbr_sequence_length () == 0)
698 fprintf (stream, "\n\tnop");
702 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
704 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
705 fputs ("/u", stream);
709 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
711 fputs ("\t! target: ", stream);
712 output_addr_const (stream, JUMP_LABEL (current_output_insn));
716 x = mark_constant_pool_use (x);
717 output_addr_const (stream, x);
719 /* N.B.: %R / %S / %T adjust memory addresses by four.
720 For SHMEDIA, that means they can be used to access the first and
721 second 32 bit part of a 64 bit (or larger) value that
722 might be held in floating point registers or memory.
723 While they can be used to access 64 bit parts of a larger value
724 held in general purpose registers, that won't work with memory -
725 neither for fp registers, since the frxx names are used. */
727 if (REG_P (x) || GET_CODE (x) == SUBREG)
729 regno = true_regnum (x);
730 regno += FP_REGISTER_P (regno) ? 1 : LSW;
731 fputs (reg_names[regno], (stream));
735 x = adjust_address (x, SImode, 4 * LSW);
736 print_operand_address (stream, XEXP (x, 0));
743 if (mode == VOIDmode)
745 if (GET_MODE_SIZE (mode) >= 8)
746 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
748 print_operand (stream, sub, 0);
750 output_operand_lossage ("invalid operand to %%R");
754 if (REG_P (x) || GET_CODE (x) == SUBREG)
756 regno = true_regnum (x);
757 regno += FP_REGISTER_P (regno) ? 0 : MSW;
758 fputs (reg_names[regno], (stream));
762 x = adjust_address (x, SImode, 4 * MSW);
763 print_operand_address (stream, XEXP (x, 0));
770 if (mode == VOIDmode)
772 if (GET_MODE_SIZE (mode) >= 8)
773 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
775 print_operand (stream, sub, 0);
777 output_operand_lossage ("invalid operand to %%S");
781 /* Next word of a double. */
782 switch (GET_CODE (x))
785 fputs (reg_names[REGNO (x) + 1], (stream));
788 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
789 && GET_CODE (XEXP (x, 0)) != POST_INC)
790 x = adjust_address (x, SImode, 4);
791 print_operand_address (stream, XEXP (x, 0));
798 switch (GET_CODE (x))
800 case PLUS: fputs ("add", stream); break;
801 case MINUS: fputs ("sub", stream); break;
802 case MULT: fputs ("mul", stream); break;
803 case DIV: fputs ("div", stream); break;
804 case EQ: fputs ("eq", stream); break;
805 case NE: fputs ("ne", stream); break;
806 case GT: case LT: fputs ("gt", stream); break;
807 case GE: case LE: fputs ("ge", stream); break;
808 case GTU: case LTU: fputs ("gtu", stream); break;
809 case GEU: case LEU: fputs ("geu", stream); break;
815 if (GET_CODE (x) == MEM
816 && GET_CODE (XEXP (x, 0)) == PLUS
817 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
818 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
823 gcc_assert (GET_CODE (x) == MEM);
827 switch (GET_CODE (x))
831 print_operand (stream, x, 0);
832 fputs (", 0", stream);
836 print_operand (stream, XEXP (x, 0), 0);
837 fputs (", ", stream);
838 print_operand (stream, XEXP (x, 1), 0);
847 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
849 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
853 if (x == CONST0_RTX (GET_MODE (x)))
855 fprintf ((stream), "r63");
860 if (GET_CODE (x) == CONST_INT)
862 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
872 switch (GET_CODE (x))
876 rtx inner = XEXP (x, 0);
878 enum machine_mode inner_mode;
880 /* We might see SUBREGs with vector mode registers inside. */
881 if (GET_CODE (inner) == SUBREG
882 && (GET_MODE_SIZE (GET_MODE (inner))
883 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
884 && subreg_lowpart_p (inner))
885 inner = SUBREG_REG (inner);
886 if (GET_CODE (inner) == CONST_INT)
888 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
891 inner_mode = GET_MODE (inner);
892 if (GET_CODE (inner) == SUBREG
893 && (GET_MODE_SIZE (GET_MODE (inner))
894 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
895 && GET_CODE (SUBREG_REG (inner)) == REG)
897 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
898 GET_MODE (SUBREG_REG (inner)),
901 inner = SUBREG_REG (inner);
903 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
905 /* Floating point register pairs are always big endian;
906 general purpose registers are 64 bit wide. */
907 regno = REGNO (inner);
908 regno = (HARD_REGNO_NREGS (regno, inner_mode)
909 - HARD_REGNO_NREGS (regno, mode))
917 /* FIXME: We need this on SHmedia32 because reload generates
918 some sign-extended HI or QI loads into DImode registers
919 but, because Pmode is SImode, the address ends up with a
920 subreg:SI of the DImode register. Maybe reload should be
921 fixed so as to apply alter_subreg to such loads? */
923 gcc_assert (trapping_target_operand (x, VOIDmode));
924 x = XEXP (XEXP (x, 2), 0);
927 gcc_assert (SUBREG_BYTE (x) == 0
928 && GET_CODE (SUBREG_REG (x)) == REG);
936 if (FP_REGISTER_P (regno)
937 && mode == V16SFmode)
938 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
939 else if (FP_REGISTER_P (REGNO (x))
941 fprintf ((stream), "fv%s", reg_names[regno] + 2);
942 else if (GET_CODE (x) == REG
944 fprintf ((stream), "fp%s", reg_names[regno] + 2);
945 else if (FP_REGISTER_P (REGNO (x))
946 && GET_MODE_SIZE (mode) > 4)
947 fprintf ((stream), "d%s", reg_names[regno] + 1);
949 fputs (reg_names[regno], (stream));
953 output_address (XEXP (x, 0));
958 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
959 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
960 && (GET_MODE (XEXP (x, 0)) == DImode
961 || GET_MODE (XEXP (x, 0)) == SImode)
962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
963 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
965 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
967 bool nested_expr = false;
970 if (GET_CODE (val) == ASHIFTRT)
973 val2 = XEXP (val, 0);
975 if (GET_CODE (val2) == CONST
976 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
981 output_addr_const (stream, val2);
984 if (GET_CODE (val) == ASHIFTRT)
986 fputs (" >> ", stream);
987 output_addr_const (stream, XEXP (val, 1));
990 fputs (" & 65535)", stream);
998 output_addr_const (stream, x);
1005 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1007 force_into (rtx value, rtx target)
1009 value = force_operand (value, target);
1010 if (! rtx_equal_p (value, target))
1011 emit_insn (gen_move_insn (target, value));
1014 /* Emit code to perform a block move. Choose the best method.
1016 OPERANDS[0] is the destination.
1017 OPERANDS[1] is the source.
1018 OPERANDS[2] is the size.
1019 OPERANDS[3] is the alignment safe to use. */
1022 expand_block_move (rtx *operands)
1024 int align = INTVAL (operands[3]);
1025 int constp = (GET_CODE (operands[2]) == CONST_INT);
1026 int bytes = (constp ? INTVAL (operands[2]) : 0);
1031 /* If we could use mov.l to move words and dest is word-aligned, we
1032 can use movua.l for loads and still generate a relatively short
1033 and efficient sequence. */
1034 if (TARGET_SH4A_ARCH && align < 4
1035 && MEM_ALIGN (operands[0]) >= 32
1036 && can_move_by_pieces (bytes, 32))
1038 rtx dest = copy_rtx (operands[0]);
1039 rtx src = copy_rtx (operands[1]);
1040 /* We could use different pseudos for each copied word, but
1041 since movua can only load into r0, it's kind of
1043 rtx temp = gen_reg_rtx (SImode);
1044 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1047 while (copied + 4 <= bytes)
1049 rtx to = adjust_address (dest, SImode, copied);
1050 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1052 emit_insn (gen_movua (temp, from));
1053 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1054 emit_move_insn (to, temp);
1059 move_by_pieces (adjust_address (dest, BLKmode, copied),
1060 adjust_automodify_address (src, BLKmode,
1062 bytes - copied, align, 0);
1067 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1068 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1069 if (align < 4 || (bytes % 4 != 0))
1072 if (TARGET_HARD_SH4)
1076 else if (bytes == 12)
1078 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1079 rtx r4 = gen_rtx_REG (SImode, 4);
1080 rtx r5 = gen_rtx_REG (SImode, 5);
1082 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1083 force_into (XEXP (operands[0], 0), r4);
1084 force_into (XEXP (operands[1], 0), r5);
1085 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1088 else if (! TARGET_SMALLCODE)
1090 const char *entry_name;
1091 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1093 rtx r4 = gen_rtx_REG (SImode, 4);
1094 rtx r5 = gen_rtx_REG (SImode, 5);
1095 rtx r6 = gen_rtx_REG (SImode, 6);
1097 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1098 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1099 force_into (XEXP (operands[0], 0), r4);
1100 force_into (XEXP (operands[1], 0), r5);
1102 dwords = bytes >> 3;
1103 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1104 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1113 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1114 rtx r4 = gen_rtx_REG (SImode, 4);
1115 rtx r5 = gen_rtx_REG (SImode, 5);
1117 sprintf (entry, "__movmemSI%d", bytes);
1118 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1119 force_into (XEXP (operands[0], 0), r4);
1120 force_into (XEXP (operands[1], 0), r5);
1121 emit_insn (gen_block_move_real (func_addr_rtx));
1125 /* This is the same number of bytes as a memcpy call, but to a different
1126 less common function name, so this will occasionally use more space. */
1127 if (! TARGET_SMALLCODE)
1129 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1130 int final_switch, while_loop;
1131 rtx r4 = gen_rtx_REG (SImode, 4);
1132 rtx r5 = gen_rtx_REG (SImode, 5);
1133 rtx r6 = gen_rtx_REG (SImode, 6);
1135 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1136 force_into (XEXP (operands[0], 0), r4);
1137 force_into (XEXP (operands[1], 0), r5);
1139 /* r6 controls the size of the move. 16 is decremented from it
1140 for each 64 bytes moved. Then the negative bit left over is used
1141 as an index into a list of move instructions. e.g., a 72 byte move
1142 would be set up with size(r6) = 14, for one iteration through the
1143 big while loop, and a switch of -2 for the last part. */
1145 final_switch = 16 - ((bytes / 4) % 16);
1146 while_loop = ((bytes / 4) / 16 - 1) * 16;
1147 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1148 emit_insn (gen_block_lump_real (func_addr_rtx));
1155 /* Prepare operands for a move define_expand; specifically, one of the
1156 operands must be in a register. */
1159 prepare_move_operands (rtx operands[], enum machine_mode mode)
1161 if ((mode == SImode || mode == DImode)
1163 && ! ((mode == Pmode || mode == ptr_mode)
1164 && tls_symbolic_operand (operands[1], Pmode) != 0))
1167 if (SYMBOLIC_CONST_P (operands[1]))
1169 if (GET_CODE (operands[0]) == MEM)
1170 operands[1] = force_reg (Pmode, operands[1]);
1171 else if (TARGET_SHMEDIA
1172 && GET_CODE (operands[1]) == LABEL_REF
1173 && target_reg_operand (operands[0], mode))
1177 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1178 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1181 else if (GET_CODE (operands[1]) == CONST
1182 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1183 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1185 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1186 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1188 operands[1] = expand_binop (mode, add_optab, temp,
1189 XEXP (XEXP (operands[1], 0), 1),
1190 no_new_pseudos ? temp
1191 : gen_reg_rtx (Pmode),
1192 0, OPTAB_LIB_WIDEN);
1196 if (! reload_in_progress && ! reload_completed)
1198 /* Copy the source to a register if both operands aren't registers. */
1199 if (! register_operand (operands[0], mode)
1200 && ! sh_register_operand (operands[1], mode))
1201 operands[1] = copy_to_mode_reg (mode, operands[1]);
1203 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1205 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1206 except that we can't use that function because it is static. */
1207 rtx new = change_address (operands[0], mode, 0);
1208 MEM_COPY_ATTRIBUTES (new, operands[0]);
1212 /* This case can happen while generating code to move the result
1213 of a library call to the target. Reject `st r0,@(rX,rY)' because
1214 reload will fail to find a spill register for rX, since r0 is already
1215 being used for the source. */
1217 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1218 && GET_CODE (operands[0]) == MEM
1219 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1220 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1221 operands[1] = copy_to_mode_reg (mode, operands[1]);
1224 if (mode == Pmode || mode == ptr_mode)
1227 enum tls_model tls_kind;
1231 if (GET_CODE (op1) == CONST
1232 && GET_CODE (XEXP (op1, 0)) == PLUS
1233 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1235 opc = XEXP (XEXP (op1, 0), 1);
1236 op1 = XEXP (XEXP (op1, 0), 0);
1241 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1243 rtx tga_op1, tga_ret, tmp, tmp2;
1247 case TLS_MODEL_GLOBAL_DYNAMIC:
1248 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1249 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1253 case TLS_MODEL_LOCAL_DYNAMIC:
1254 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1255 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1257 tmp = gen_reg_rtx (Pmode);
1258 emit_move_insn (tmp, tga_ret);
1260 if (register_operand (op0, Pmode))
1263 tmp2 = gen_reg_rtx (Pmode);
1265 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1269 case TLS_MODEL_INITIAL_EXEC:
1272 /* Don't schedule insns for getting GOT address when
1273 the first scheduling is enabled, to avoid spill
1275 if (flag_schedule_insns)
1276 emit_insn (gen_blockage ());
1277 emit_insn (gen_GOTaddr2picreg ());
1278 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1280 if (flag_schedule_insns)
1281 emit_insn (gen_blockage ());
1283 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1284 tmp = gen_sym2GOTTPOFF (op1);
1285 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1289 case TLS_MODEL_LOCAL_EXEC:
1290 tmp2 = gen_reg_rtx (Pmode);
1291 emit_insn (gen_load_gbr (tmp2));
1292 tmp = gen_reg_rtx (Pmode);
1293 emit_insn (gen_symTPOFF2reg (tmp, op1));
1295 if (register_operand (op0, Pmode))
1298 op1 = gen_reg_rtx (Pmode);
1300 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1307 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1315 /* Prepare the operands for an scc instruction; make sure that the
1316 compare has been done. */
1318 prepare_scc_operands (enum rtx_code code)
1320 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1321 enum rtx_code oldcode = code;
1322 enum machine_mode mode;
1324 /* First need a compare insn. */
1328 /* It isn't possible to handle this case. */
1345 if (code != oldcode)
1347 rtx tmp = sh_compare_op0;
1348 sh_compare_op0 = sh_compare_op1;
1349 sh_compare_op1 = tmp;
1352 mode = GET_MODE (sh_compare_op0);
1353 if (mode == VOIDmode)
1354 mode = GET_MODE (sh_compare_op1);
1356 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1357 if ((code != EQ && code != NE
1358 && (sh_compare_op1 != const0_rtx
1359 || code == GTU || code == GEU || code == LTU || code == LEU))
1360 || (mode == DImode && sh_compare_op1 != const0_rtx)
1361 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1362 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1364 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1365 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1366 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1367 gen_rtx_SET (VOIDmode, t_reg,
1368 gen_rtx_fmt_ee (code, SImode,
1369 sh_compare_op0, sh_compare_op1)),
1370 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1372 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1373 gen_rtx_fmt_ee (code, SImode,
1374 sh_compare_op0, sh_compare_op1)));
1379 /* Called from the md file, set up the operands of a compare instruction. */
1382 from_compare (rtx *operands, int code)
1384 enum machine_mode mode = GET_MODE (sh_compare_op0);
1386 if (mode == VOIDmode)
1387 mode = GET_MODE (sh_compare_op1);
1390 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1392 /* Force args into regs, since we can't use constants here. */
1393 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1394 if (sh_compare_op1 != const0_rtx
1395 || code == GTU || code == GEU
1396 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1397 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1399 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1401 from_compare (operands, GT);
1402 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1405 insn = gen_rtx_SET (VOIDmode,
1406 gen_rtx_REG (SImode, T_REG),
1407 gen_rtx_fmt_ee (code, SImode,
1408 sh_compare_op0, sh_compare_op1));
1409 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1411 insn = gen_rtx_PARALLEL (VOIDmode,
1413 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1414 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1420 /* Functions to output assembly code. */
1422 /* Return a sequence of instructions to perform DI or DF move.
1424 Since the SH cannot move a DI or DF in one instruction, we have
1425 to take care when we see overlapping source and dest registers. */
1428 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1429 enum machine_mode mode)
1431 rtx dst = operands[0];
1432 rtx src = operands[1];
1434 if (GET_CODE (dst) == MEM
1435 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1436 return "mov.l %T1,%0\n\tmov.l %1,%0";
1438 if (register_operand (dst, mode)
1439 && register_operand (src, mode))
1441 if (REGNO (src) == MACH_REG)
1442 return "sts mach,%S0\n\tsts macl,%R0";
1444 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1445 when mov.d r1,r0 do r1->r0 then r2->r1. */
1447 if (REGNO (src) + 1 == REGNO (dst))
1448 return "mov %T1,%T0\n\tmov %1,%0";
1450 return "mov %1,%0\n\tmov %T1,%T0";
1452 else if (GET_CODE (src) == CONST_INT)
1454 if (INTVAL (src) < 0)
1455 output_asm_insn ("mov #-1,%S0", operands);
1457 output_asm_insn ("mov #0,%S0", operands);
1459 return "mov %1,%R0";
1461 else if (GET_CODE (src) == MEM)
1464 int dreg = REGNO (dst);
1465 rtx inside = XEXP (src, 0);
1467 switch (GET_CODE (inside))
1470 ptrreg = REGNO (inside);
1474 ptrreg = subreg_regno (inside);
1478 ptrreg = REGNO (XEXP (inside, 0));
1479 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1480 an offsettable address. Unfortunately, offsettable addresses use
1481 QImode to check the offset, and a QImode offsettable address
1482 requires r0 for the other operand, which is not currently
1483 supported, so we can't use the 'o' constraint.
1484 Thus we must check for and handle r0+REG addresses here.
1485 We punt for now, since this is likely very rare. */
1486 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1490 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1492 return "mov.l %1,%0\n\tmov.l %1,%T0";
1497 /* Work out the safe way to copy. Copy into the second half first. */
1499 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1502 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1505 /* Print an instruction which would have gone into a delay slot after
1506 another instruction, but couldn't because the other instruction expanded
1507 into a sequence where putting the slot insn at the end wouldn't work. */
1510 print_slot (rtx insn)
1512 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1514 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1518 output_far_jump (rtx insn, rtx op)
1520 struct { rtx lab, reg, op; } this;
1521 rtx braf_base_lab = NULL_RTX;
1524 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1527 this.lab = gen_label_rtx ();
1531 && offset - get_attr_length (insn) <= 32766)
1534 jump = "mov.w %O0,%1; braf %1";
1542 jump = "mov.l %O0,%1; braf %1";
1544 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1547 jump = "mov.l %O0,%1; jmp @%1";
1549 /* If we have a scratch register available, use it. */
1550 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1551 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1553 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1554 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1555 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1556 output_asm_insn (jump, &this.lab);
1557 if (dbr_sequence_length ())
1558 print_slot (final_sequence);
1560 output_asm_insn ("nop", 0);
1564 /* Output the delay slot insn first if any. */
1565 if (dbr_sequence_length ())
1566 print_slot (final_sequence);
1568 this.reg = gen_rtx_REG (SImode, 13);
1569 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1570 Fortunately, MACL is fixed and call-clobbered, and we never
1571 need its value across jumps, so save r13 in it instead of in
1574 output_asm_insn ("lds r13, macl", 0);
1576 output_asm_insn ("mov.l r13,@-r15", 0);
1577 output_asm_insn (jump, &this.lab);
1579 output_asm_insn ("sts macl, r13", 0);
1581 output_asm_insn ("mov.l @r15+,r13", 0);
1583 if (far && flag_pic && TARGET_SH2)
1585 braf_base_lab = gen_label_rtx ();
1586 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1587 CODE_LABEL_NUMBER (braf_base_lab));
1590 output_asm_insn (".align 2", 0);
1591 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1593 if (far && flag_pic)
1596 this.lab = braf_base_lab;
1597 output_asm_insn (".long %O2-%O0", &this.lab);
1600 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1604 /* Local label counter, used for constants in the pool and inside
1605 pattern branches. */
1607 static int lf = 100;
1609 /* Output code for ordinary branches. */
1612 output_branch (int logic, rtx insn, rtx *operands)
1614 switch (get_attr_length (insn))
1617 /* This can happen if filling the delay slot has caused a forward
1618 branch to exceed its range (we could reverse it, but only
1619 when we know we won't overextend other branches; this should
1620 best be handled by relaxation).
1621 It can also happen when other condbranches hoist delay slot insn
1622 from their destination, thus leading to code size increase.
1623 But the branch will still be in the range -4092..+4098 bytes. */
1628 /* The call to print_slot will clobber the operands. */
1629 rtx op0 = operands[0];
1631 /* If the instruction in the delay slot is annulled (true), then
1632 there is no delay slot where we can put it now. The only safe
1633 place for it is after the label. final will do that by default. */
1636 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1637 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1639 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1640 ASSEMBLER_DIALECT ? "/" : ".", label);
1641 print_slot (final_sequence);
1644 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1646 output_asm_insn ("bra\t%l0", &op0);
1647 fprintf (asm_out_file, "\tnop\n");
1648 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1652 /* When relaxing, handle this like a short branch. The linker
1653 will fix it up if it still doesn't fit after relaxation. */
1655 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1657 /* These are for SH2e, in which we have to account for the
1658 extra nop because of the hardware bug in annulled branches. */
1664 gcc_assert (!final_sequence
1665 || !(INSN_ANNULLED_BRANCH_P
1666 (XVECEXP (final_sequence, 0, 0))));
1667 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1669 ASSEMBLER_DIALECT ? "/" : ".", label);
1670 fprintf (asm_out_file, "\tnop\n");
1671 output_asm_insn ("bra\t%l0", operands);
1672 fprintf (asm_out_file, "\tnop\n");
1673 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1677 /* When relaxing, fall through. */
1682 sprintf (buffer, "b%s%ss\t%%l0",
1684 ASSEMBLER_DIALECT ? "/" : ".");
1685 output_asm_insn (buffer, &operands[0]);
1690 /* There should be no longer branches now - that would
1691 indicate that something has destroyed the branches set
1692 up in machine_dependent_reorg. */
1698 output_branchy_insn (enum rtx_code code, const char *template,
1699 rtx insn, rtx *operands)
1701 rtx next_insn = NEXT_INSN (insn);
1703 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1705 rtx src = SET_SRC (PATTERN (next_insn));
1706 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1708 /* Following branch not taken */
1709 operands[9] = gen_label_rtx ();
1710 emit_label_after (operands[9], next_insn);
1711 INSN_ADDRESSES_NEW (operands[9],
1712 INSN_ADDRESSES (INSN_UID (next_insn))
1713 + get_attr_length (next_insn));
1718 int offset = (branch_dest (next_insn)
1719 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1720 if (offset >= -252 && offset <= 258)
1722 if (GET_CODE (src) == IF_THEN_ELSE)
1724 src = XEXP (src, 1);
1730 operands[9] = gen_label_rtx ();
1731 emit_label_after (operands[9], insn);
1732 INSN_ADDRESSES_NEW (operands[9],
1733 INSN_ADDRESSES (INSN_UID (insn))
1734 + get_attr_length (insn));
1739 output_ieee_ccmpeq (rtx insn, rtx *operands)
1741 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1745 /* Output the start of the assembler file. */
1748 sh_file_start (void)
1750 default_file_start ();
1753 /* Declare the .directive section before it is used. */
1754 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1755 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1759 /* We need to show the text section with the proper
1760 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1761 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1762 will complain. We can teach GAS specifically about the
1763 default attributes for our choice of text section, but
1764 then we would have to change GAS again if/when we change
1765 the text section name. */
1766 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1768 /* Switch to the data section so that the coffsem symbol
1769 isn't in the text section. */
1770 switch_to_section (data_section);
1772 if (TARGET_LITTLE_ENDIAN)
1773 fputs ("\t.little\n", asm_out_file);
1777 if (TARGET_SHCOMPACT)
1778 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1779 else if (TARGET_SHMEDIA)
1780 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1781 TARGET_SHMEDIA64 ? 64 : 32);
1785 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1788 unspec_caller_rtx_p (rtx pat)
1790 switch (GET_CODE (pat))
1793 return unspec_caller_rtx_p (XEXP (pat, 0));
1796 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1798 return unspec_caller_rtx_p (XEXP (pat, 1));
1800 if (XINT (pat, 1) == UNSPEC_CALLER)
1809 /* Indicate that INSN cannot be duplicated. This is true for insn
1810 that generates a unique label. */
1813 sh_cannot_copy_insn_p (rtx insn)
1817 if (!reload_completed || !flag_pic)
1820 if (GET_CODE (insn) != INSN)
1822 if (asm_noperands (insn) >= 0)
1825 pat = PATTERN (insn);
1826 if (GET_CODE (pat) != SET)
1828 pat = SET_SRC (pat);
1830 if (unspec_caller_rtx_p (pat))
1836 /* Actual number of instructions used to make a shift by N. */
1837 static const char ashiftrt_insns[] =
1838 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1840 /* Left shift and logical right shift are the same. */
1841 static const char shift_insns[] =
1842 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1844 /* Individual shift amounts needed to get the above length sequences.
1845 One bit right shifts clobber the T bit, so when possible, put one bit
1846 shifts in the middle of the sequence, so the ends are eligible for
1847 branch delay slots. */
1848 static const short shift_amounts[32][5] = {
1849 {0}, {1}, {2}, {2, 1},
1850 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1851 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1852 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1853 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1854 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1855 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1856 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1858 /* Likewise, but for shift amounts < 16, up to three highmost bits
1859 might be clobbered. This is typically used when combined with some
1860 kind of sign or zero extension. */
1862 static const char ext_shift_insns[] =
1863 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1865 static const short ext_shift_amounts[32][4] = {
1866 {0}, {1}, {2}, {2, 1},
1867 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1868 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1869 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1870 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1871 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1872 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1873 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1875 /* Assuming we have a value that has been sign-extended by at least one bit,
1876 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1877 to shift it by N without data loss, and quicker than by other means? */
1878 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1880 /* This is used in length attributes in sh.md to help compute the length
1881 of arbitrary constant shift instructions. */
1884 shift_insns_rtx (rtx insn)
1886 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1887 int shift_count = INTVAL (XEXP (set_src, 1));
1888 enum rtx_code shift_code = GET_CODE (set_src);
1893 return ashiftrt_insns[shift_count];
1896 return shift_insns[shift_count];
1902 /* Return the cost of a shift. */
1912 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1914 if (GET_MODE (x) == DImode
1915 && GET_CODE (XEXP (x, 1)) == CONST_INT
1916 && INTVAL (XEXP (x, 1)) == 1)
1919 /* Everything else is invalid, because there is no pattern for it. */
1922 /* If shift by a non constant, then this will be expensive. */
1923 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1924 return SH_DYNAMIC_SHIFT_COST;
1926 value = INTVAL (XEXP (x, 1));
1928 /* Otherwise, return the true cost in instructions. */
1929 if (GET_CODE (x) == ASHIFTRT)
1931 int cost = ashiftrt_insns[value];
1932 /* If SH3, then we put the constant in a reg and use shad. */
1933 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1934 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1938 return shift_insns[value];
1941 /* Return the cost of an AND operation. */
1948 /* Anding with a register is a single cycle and instruction. */
1949 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1952 i = INTVAL (XEXP (x, 1));
1956 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1957 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1958 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1961 return 1 + rtx_cost (XEXP (x, 1), AND);
1964 /* These constants are single cycle extu.[bw] instructions. */
1965 if (i == 0xff || i == 0xffff)
1967 /* Constants that can be used in an and immediate instruction in a single
1968 cycle, but this requires r0, so make it a little more expensive. */
1969 if (CONST_OK_FOR_K08 (i))
1971 /* Constants that can be loaded with a mov immediate and an and.
1972 This case is probably unnecessary. */
1973 if (CONST_OK_FOR_I08 (i))
1975 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1976 This case is probably unnecessary. */
1980 /* Return the cost of an addition or a subtraction. */
1985 /* Adding a register is a single cycle insn. */
1986 if (GET_CODE (XEXP (x, 1)) == REG
1987 || GET_CODE (XEXP (x, 1)) == SUBREG)
1990 /* Likewise for small constants. */
1991 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1992 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1996 switch (GET_CODE (XEXP (x, 1)))
2001 return TARGET_SHMEDIA64 ? 5 : 3;
2004 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2006 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2008 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2016 /* Any other constant requires a 2 cycle pc-relative load plus an
2021 /* Return the cost of a multiply. */
2023 multcosts (rtx x ATTRIBUTE_UNUSED)
2025 if (sh_multcost >= 0)
2028 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2029 accept constants. Ideally, we would use a cost of one or two and
2030 add the cost of the operand, but disregard the latter when inside loops
2031 and loop invariant code motion is still to follow.
2032 Using a multiply first and splitting it later if it's a loss
2033 doesn't work because of different sign / zero extension semantics
2034 of multiplies vs. shifts. */
2035 return TARGET_SMALLCODE ? 2 : 3;
2039 /* We have a mul insn, so we can never take more than the mul and the
2040 read of the mac reg, but count more because of the latency and extra
2042 if (TARGET_SMALLCODE)
2047 /* If we're aiming at small code, then just count the number of
2048 insns in a multiply call sequence. */
2049 if (TARGET_SMALLCODE)
2052 /* Otherwise count all the insns in the routine we'd be calling too. */
2056 /* Compute a (partial) cost for rtx X. Return true if the complete
2057 cost has been computed, and false if subexpressions should be
2058 scanned. In either case, *TOTAL contains the cost result. */
2061 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2068 if (INTVAL (x) == 0)
2070 else if (outer_code == AND && and_operand ((x), DImode))
2072 else if ((outer_code == IOR || outer_code == XOR
2073 || outer_code == PLUS)
2074 && CONST_OK_FOR_I10 (INTVAL (x)))
2076 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2077 *total = COSTS_N_INSNS (outer_code != SET);
2078 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2079 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2080 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2081 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2083 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2086 if (CONST_OK_FOR_I08 (INTVAL (x)))
2088 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2089 && CONST_OK_FOR_K08 (INTVAL (x)))
2098 if (TARGET_SHMEDIA64)
2099 *total = COSTS_N_INSNS (4);
2100 else if (TARGET_SHMEDIA32)
2101 *total = COSTS_N_INSNS (2);
2108 *total = COSTS_N_INSNS (4);
2113 if (x == CONST0_RTX (GET_MODE (x)))
2115 else if (sh_1el_vec (x, VOIDmode))
2116 *total = outer_code != SET;
2117 if (sh_rep_vec (x, VOIDmode))
2118 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2119 + (outer_code != SET));
2120 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2125 *total = COSTS_N_INSNS (addsubcosts (x));
2129 *total = COSTS_N_INSNS (andcosts (x));
2133 *total = COSTS_N_INSNS (multcosts (x));
2139 *total = COSTS_N_INSNS (shiftcosts (x));
2146 *total = COSTS_N_INSNS (20);
2150 if (sh_1el_vec (x, VOIDmode))
2151 *total = outer_code != SET;
2152 if (sh_rep_vec (x, VOIDmode))
2153 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2154 + (outer_code != SET));
2155 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2168 /* Compute the cost of an address. For the SH, all valid addresses are
2169 the same cost. Use a slightly higher cost for reg + reg addressing,
2170 since it increases pressure on r0. */
2173 sh_address_cost (rtx X)
2175 return (GET_CODE (X) == PLUS
2176 && ! CONSTANT_P (XEXP (X, 1))
2177 && ! TARGET_SHMEDIA ? 1 : 0);
2180 /* Code to expand a shift. */
2183 gen_ashift (int type, int n, rtx reg)
2185 /* Negative values here come from the shift_amounts array. */
2198 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2202 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2204 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2207 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2212 /* Same for HImode */
2215 gen_ashift_hi (int type, int n, rtx reg)
2217 /* Negative values here come from the shift_amounts array. */
2231 /* We don't have HImode right shift operations because using the
2232 ordinary 32 bit shift instructions for that doesn't generate proper
2233 zero/sign extension.
2234 gen_ashift_hi is only called in contexts where we know that the
2235 sign extension works out correctly. */
2238 if (GET_CODE (reg) == SUBREG)
2240 offset = SUBREG_BYTE (reg);
2241 reg = SUBREG_REG (reg);
2243 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2247 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2252 /* Output RTL to split a constant shift into its component SH constant
2253 shift instructions. */
2256 gen_shifty_op (int code, rtx *operands)
2258 int value = INTVAL (operands[2]);
2261 /* Truncate the shift count in case it is out of bounds. */
2262 value = value & 0x1f;
2266 if (code == LSHIFTRT)
2268 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2269 emit_insn (gen_movt (operands[0]));
2272 else if (code == ASHIFT)
2274 /* There is a two instruction sequence for 31 bit left shifts,
2275 but it requires r0. */
2276 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2278 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2279 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2284 else if (value == 0)
2286 /* This can happen even when optimizing, if there were subregs before
2287 reload. Don't output a nop here, as this is never optimized away;
2288 use a no-op move instead. */
2289 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2293 max = shift_insns[value];
2294 for (i = 0; i < max; i++)
2295 gen_ashift (code, shift_amounts[value][i], operands[0]);
2298 /* Same as above, but optimized for values where the topmost bits don't
2302 gen_shifty_hi_op (int code, rtx *operands)
2304 int value = INTVAL (operands[2]);
2306 void (*gen_fun) (int, int, rtx);
2308 /* This operation is used by and_shl for SImode values with a few
2309 high bits known to be cleared. */
2313 emit_insn (gen_nop ());
2317 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2320 max = ext_shift_insns[value];
2321 for (i = 0; i < max; i++)
2322 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2325 /* When shifting right, emit the shifts in reverse order, so that
2326 solitary negative values come first. */
2327 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2328 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2331 /* Output RTL for an arithmetic right shift. */
2333 /* ??? Rewrite to use super-optimizer sequences. */
2336 expand_ashiftrt (rtx *operands)
2344 if (GET_CODE (operands[2]) != CONST_INT)
2346 rtx count = copy_to_mode_reg (SImode, operands[2]);
2347 emit_insn (gen_negsi2 (count, count));
2348 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2351 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2352 > 1 + SH_DYNAMIC_SHIFT_COST)
2355 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2356 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2360 if (GET_CODE (operands[2]) != CONST_INT)
2363 value = INTVAL (operands[2]) & 31;
2367 /* If we are called from abs expansion, arrange things so that we
2368 we can use a single MT instruction that doesn't clobber the source,
2369 if LICM can hoist out the load of the constant zero. */
2370 if (currently_expanding_to_rtl)
2372 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2374 emit_insn (gen_mov_neg_si_t (operands[0]));
2377 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2380 else if (value >= 16 && value <= 19)
2382 wrk = gen_reg_rtx (SImode);
2383 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2386 gen_ashift (ASHIFTRT, 1, wrk);
2387 emit_move_insn (operands[0], wrk);
2390 /* Expand a short sequence inline, longer call a magic routine. */
2391 else if (value <= 5)
2393 wrk = gen_reg_rtx (SImode);
2394 emit_move_insn (wrk, operands[1]);
2396 gen_ashift (ASHIFTRT, 1, wrk);
2397 emit_move_insn (operands[0], wrk);
2401 wrk = gen_reg_rtx (Pmode);
2403 /* Load the value into an arg reg and call a helper. */
2404 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2405 sprintf (func, "__ashiftrt_r4_%d", value);
2406 function_symbol (wrk, func, SFUNC_STATIC);
2407 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2408 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2413 sh_dynamicalize_shift_p (rtx count)
2415 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2418 /* Try to find a good way to implement the combiner pattern
2419 [(set (match_operand:SI 0 "register_operand" "r")
2420 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2421 (match_operand:SI 2 "const_int_operand" "n"))
2422 (match_operand:SI 3 "const_int_operand" "n"))) .
2423 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2424 return 0 for simple right / left or left/right shift combination.
2425 return 1 for a combination of shifts with zero_extend.
2426 return 2 for a combination of shifts with an AND that needs r0.
2427 return 3 for a combination of shifts with an AND that needs an extra
2428 scratch register, when the three highmost bits of the AND mask are clear.
2429 return 4 for a combination of shifts with an AND that needs an extra
2430 scratch register, when any of the three highmost bits of the AND mask
2432 If ATTRP is set, store an initial right shift width in ATTRP[0],
2433 and the instruction length in ATTRP[1] . These values are not valid
2435 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2436 shift_amounts for the last shift value that is to be used before the
2439 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2441 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2442 int left = INTVAL (left_rtx), right;
2444 int cost, best_cost = 10000;
2445 int best_right = 0, best_len = 0;
2449 if (left < 0 || left > 31)
2451 if (GET_CODE (mask_rtx) == CONST_INT)
2452 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2454 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2455 /* Can this be expressed as a right shift / left shift pair? */
2456 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2457 right = exact_log2 (lsb);
2458 mask2 = ~(mask + lsb - 1);
2459 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2460 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2462 best_cost = shift_insns[right] + shift_insns[right + left];
2463 /* mask has no trailing zeroes <==> ! right */
2464 else if (! right && mask2 == ~(lsb2 - 1))
2466 int late_right = exact_log2 (lsb2);
2467 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2469 /* Try to use zero extend. */
2470 if (mask2 == ~(lsb2 - 1))
2474 for (width = 8; width <= 16; width += 8)
2476 /* Can we zero-extend right away? */
2477 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2480 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2481 if (cost < best_cost)
2492 /* ??? Could try to put zero extend into initial right shift,
2493 or even shift a bit left before the right shift. */
2494 /* Determine value of first part of left shift, to get to the
2495 zero extend cut-off point. */
2496 first = width - exact_log2 (lsb2) + right;
2497 if (first >= 0 && right + left - first >= 0)
2499 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2500 + ext_shift_insns[right + left - first];
2501 if (cost < best_cost)
2513 /* Try to use r0 AND pattern */
2514 for (i = 0; i <= 2; i++)
2518 if (! CONST_OK_FOR_K08 (mask >> i))
2520 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2521 if (cost < best_cost)
2526 best_len = cost - 1;
2529 /* Try to use a scratch register to hold the AND operand. */
2530 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2531 for (i = 0; i <= 2; i++)
2535 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2536 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2537 if (cost < best_cost)
2542 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2548 attrp[0] = best_right;
2549 attrp[1] = best_len;
2554 /* This is used in length attributes of the unnamed instructions
2555 corresponding to shl_and_kind return values of 1 and 2. */
2557 shl_and_length (rtx insn)
2559 rtx set_src, left_rtx, mask_rtx;
2562 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2563 left_rtx = XEXP (XEXP (set_src, 0), 1);
2564 mask_rtx = XEXP (set_src, 1);
2565 shl_and_kind (left_rtx, mask_rtx, attributes);
2566 return attributes[1];
2569 /* This is used in length attribute of the and_shl_scratch instruction. */
2572 shl_and_scr_length (rtx insn)
2574 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2575 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2576 rtx op = XEXP (set_src, 0);
2577 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2578 op = XEXP (XEXP (op, 0), 0);
2579 return len + shift_insns[INTVAL (XEXP (op, 1))];
2582 /* Generate rtl for instructions for which shl_and_kind advised a particular
2583 method of generating them, i.e. returned zero. */
2586 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2589 unsigned HOST_WIDE_INT mask;
2590 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2591 int right, total_shift;
2592 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2594 right = attributes[0];
2595 total_shift = INTVAL (left_rtx) + right;
2596 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2603 int first = attributes[2];
2608 emit_insn ((mask << right) <= 0xff
2609 ? gen_zero_extendqisi2 (dest,
2610 gen_lowpart (QImode, source))
2611 : gen_zero_extendhisi2 (dest,
2612 gen_lowpart (HImode, source)));
2616 emit_insn (gen_movsi (dest, source));
2620 operands[2] = GEN_INT (right);
2621 gen_shifty_hi_op (LSHIFTRT, operands);
2625 operands[2] = GEN_INT (first);
2626 gen_shifty_hi_op (ASHIFT, operands);
2627 total_shift -= first;
2631 emit_insn (mask <= 0xff
2632 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2633 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2634 if (total_shift > 0)
2636 operands[2] = GEN_INT (total_shift);
2637 gen_shifty_hi_op (ASHIFT, operands);
2642 shift_gen_fun = gen_shifty_op;
2644 /* If the topmost bit that matters is set, set the topmost bits
2645 that don't matter. This way, we might be able to get a shorter
2647 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2648 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2650 /* Don't expand fine-grained when combining, because that will
2651 make the pattern fail. */
2652 if (currently_expanding_to_rtl
2653 || reload_in_progress || reload_completed)
2657 /* Cases 3 and 4 should be handled by this split
2658 only while combining */
2659 gcc_assert (kind <= 2);
2662 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2665 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2670 operands[2] = GEN_INT (total_shift);
2671 shift_gen_fun (ASHIFT, operands);
2678 if (kind != 4 && total_shift < 16)
2680 neg = -ext_shift_amounts[total_shift][1];
2682 neg -= ext_shift_amounts[total_shift][2];
2686 emit_insn (gen_and_shl_scratch (dest, source,
2689 GEN_INT (total_shift + neg),
2691 emit_insn (gen_movsi (dest, dest));
2698 /* Try to find a good way to implement the combiner pattern
2699 [(set (match_operand:SI 0 "register_operand" "=r")
2700 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2701 (match_operand:SI 2 "const_int_operand" "n")
2702 (match_operand:SI 3 "const_int_operand" "n")
2704 (clobber (reg:SI T_REG))]
2705 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2706 return 0 for simple left / right shift combination.
2707 return 1 for left shift / 8 bit sign extend / left shift.
2708 return 2 for left shift / 16 bit sign extend / left shift.
2709 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2710 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2711 return 5 for left shift / 16 bit sign extend / right shift
2712 return 6 for < 8 bit sign extend / left shift.
2713 return 7 for < 8 bit sign extend / left shift / single right shift.
2714 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2717 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2719 int left, size, insize, ext;
2720 int cost = 0, best_cost;
2723 left = INTVAL (left_rtx);
2724 size = INTVAL (size_rtx);
2725 insize = size - left;
2726 gcc_assert (insize > 0);
2727 /* Default to left / right shift. */
2729 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2732 /* 16 bit shift / sign extend / 16 bit shift */
2733 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2734 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2735 below, by alternative 3 or something even better. */
2736 if (cost < best_cost)
2742 /* Try a plain sign extend between two shifts. */
2743 for (ext = 16; ext >= insize; ext -= 8)
2747 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2748 if (cost < best_cost)
2750 kind = ext / (unsigned) 8;
2754 /* Check if we can do a sloppy shift with a final signed shift
2755 restoring the sign. */
2756 if (EXT_SHIFT_SIGNED (size - ext))
2757 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2758 /* If not, maybe it's still cheaper to do the second shift sloppy,
2759 and do a final sign extend? */
2760 else if (size <= 16)
2761 cost = ext_shift_insns[ext - insize] + 1
2762 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2765 if (cost < best_cost)
2767 kind = ext / (unsigned) 8 + 2;
2771 /* Check if we can sign extend in r0 */
2774 cost = 3 + shift_insns[left];
2775 if (cost < best_cost)
2780 /* Try the same with a final signed shift. */
2783 cost = 3 + ext_shift_insns[left + 1] + 1;
2784 if (cost < best_cost)
2793 /* Try to use a dynamic shift. */
2794 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2795 if (cost < best_cost)
2806 /* Function to be used in the length attribute of the instructions
2807 implementing this pattern. */
2810 shl_sext_length (rtx insn)
2812 rtx set_src, left_rtx, size_rtx;
2815 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2816 left_rtx = XEXP (XEXP (set_src, 0), 1);
2817 size_rtx = XEXP (set_src, 1);
2818 shl_sext_kind (left_rtx, size_rtx, &cost);
2822 /* Generate rtl for this pattern */
2825 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2828 int left, size, insize, cost;
2831 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2832 left = INTVAL (left_rtx);
2833 size = INTVAL (size_rtx);
2834 insize = size - left;
2842 int ext = kind & 1 ? 8 : 16;
2843 int shift2 = size - ext;
2845 /* Don't expand fine-grained when combining, because that will
2846 make the pattern fail. */
2847 if (! currently_expanding_to_rtl
2848 && ! reload_in_progress && ! reload_completed)
2850 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2851 emit_insn (gen_movsi (dest, source));
2855 emit_insn (gen_movsi (dest, source));
2859 operands[2] = GEN_INT (ext - insize);
2860 gen_shifty_hi_op (ASHIFT, operands);
2863 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2864 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2869 operands[2] = GEN_INT (shift2);
2870 gen_shifty_op (ASHIFT, operands);
2877 if (EXT_SHIFT_SIGNED (shift2))
2879 operands[2] = GEN_INT (shift2 + 1);
2880 gen_shifty_op (ASHIFT, operands);
2881 operands[2] = const1_rtx;
2882 gen_shifty_op (ASHIFTRT, operands);
2885 operands[2] = GEN_INT (shift2);
2886 gen_shifty_hi_op (ASHIFT, operands);
2890 operands[2] = GEN_INT (-shift2);
2891 gen_shifty_hi_op (LSHIFTRT, operands);
2893 emit_insn (size <= 8
2894 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2895 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2902 if (! currently_expanding_to_rtl
2903 && ! reload_in_progress && ! reload_completed)
2904 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2908 operands[2] = GEN_INT (16 - insize);
2909 gen_shifty_hi_op (ASHIFT, operands);
2910 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2912 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2914 gen_ashift (ASHIFTRT, 1, dest);
2919 /* Don't expand fine-grained when combining, because that will
2920 make the pattern fail. */
2921 if (! currently_expanding_to_rtl
2922 && ! reload_in_progress && ! reload_completed)
2924 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2925 emit_insn (gen_movsi (dest, source));
2928 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2929 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2930 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2932 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2933 gen_shifty_op (ASHIFT, operands);
2935 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2943 /* Prefix a symbol_ref name with "datalabel". */
2946 gen_datalabel_ref (rtx sym)
2950 if (GET_CODE (sym) == LABEL_REF)
2951 return gen_rtx_CONST (GET_MODE (sym),
2952 gen_rtx_UNSPEC (GET_MODE (sym),
2956 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2958 str = XSTR (sym, 0);
2959 /* Share all SYMBOL_REF strings with the same value - that is important
2961 str = IDENTIFIER_POINTER (get_identifier (str));
2962 XSTR (sym, 0) = str;
2968 static alloc_pool label_ref_list_pool;
2970 typedef struct label_ref_list_d
2973 struct label_ref_list_d *next;
2974 } *label_ref_list_t;
2976 /* The SH cannot load a large constant into a register, constants have to
2977 come from a pc relative load. The reference of a pc relative load
2978 instruction must be less than 1k in front of the instruction. This
2979 means that we often have to dump a constant inside a function, and
2980 generate code to branch around it.
2982 It is important to minimize this, since the branches will slow things
2983 down and make things bigger.
2985 Worst case code looks like:
3003 We fix this by performing a scan before scheduling, which notices which
3004 instructions need to have their operands fetched from the constant table
3005 and builds the table.
3009 scan, find an instruction which needs a pcrel move. Look forward, find the
3010 last barrier which is within MAX_COUNT bytes of the requirement.
3011 If there isn't one, make one. Process all the instructions between
3012 the find and the barrier.
3014 In the above example, we can tell that L3 is within 1k of L1, so
3015 the first move can be shrunk from the 3 insn+constant sequence into
3016 just 1 insn, and the constant moved to L3 to make:
3027 Then the second move becomes the target for the shortening process. */
3031 rtx value; /* Value in table. */
3032 rtx label; /* Label of value. */
3033 label_ref_list_t wend; /* End of window. */
3034 enum machine_mode mode; /* Mode of value. */
3036 /* True if this constant is accessed as part of a post-increment
3037 sequence. Note that HImode constants are never accessed in this way. */
3038 bool part_of_sequence_p;
3041 /* The maximum number of constants that can fit into one pool, since
3042 constants in the range 0..510 are at least 2 bytes long, and in the
3043 range from there to 1018 at least 4 bytes. */
3045 #define MAX_POOL_SIZE 372
3046 static pool_node pool_vector[MAX_POOL_SIZE];
3047 static int pool_size;
3048 static rtx pool_window_label;
3049 static int pool_window_last;
3051 /* ??? If we need a constant in HImode which is the truncated value of a
3052 constant we need in SImode, we could combine the two entries thus saving
3053 two bytes. Is this common enough to be worth the effort of implementing
3056 /* ??? This stuff should be done at the same time that we shorten branches.
3057 As it is now, we must assume that all branches are the maximum size, and
3058 this causes us to almost always output constant pools sooner than
3061 /* Add a constant to the pool and return its label. */
3064 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3068 label_ref_list_t ref, newref;
3070 /* First see if we've already got it. */
3071 for (i = 0; i < pool_size; i++)
3073 if (x->code == pool_vector[i].value->code
3074 && mode == pool_vector[i].mode)
3076 if (x->code == CODE_LABEL)
3078 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3081 if (rtx_equal_p (x, pool_vector[i].value))
3086 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3088 new = gen_label_rtx ();
3089 LABEL_REFS (new) = pool_vector[i].label;
3090 pool_vector[i].label = lab = new;
3092 if (lab && pool_window_label)
3094 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3095 newref->label = pool_window_label;
3096 ref = pool_vector[pool_window_last].wend;
3098 pool_vector[pool_window_last].wend = newref;
3101 pool_window_label = new;
3102 pool_window_last = i;
3108 /* Need a new one. */
3109 pool_vector[pool_size].value = x;
3110 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3113 pool_vector[pool_size - 1].part_of_sequence_p = true;
3116 lab = gen_label_rtx ();
3117 pool_vector[pool_size].mode = mode;
3118 pool_vector[pool_size].label = lab;
3119 pool_vector[pool_size].wend = NULL;
3120 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3121 if (lab && pool_window_label)
3123 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3124 newref->label = pool_window_label;
3125 ref = pool_vector[pool_window_last].wend;
3127 pool_vector[pool_window_last].wend = newref;
3130 pool_window_label = lab;
3131 pool_window_last = pool_size;
3136 /* Output the literal table. START, if nonzero, is the first instruction
3137 this table is needed for, and also indicates that there is at least one
3138 casesi_worker_2 instruction; We have to emit the operand3 labels from
3139 these insns at a 4-byte aligned position. BARRIER is the barrier
3140 after which we are to place the table. */
3143 dump_table (rtx start, rtx barrier)
3149 label_ref_list_t ref;
3152 /* Do two passes, first time dump out the HI sized constants. */
3154 for (i = 0; i < pool_size; i++)
3156 pool_node *p = &pool_vector[i];
3158 if (p->mode == HImode)
3162 scan = emit_insn_after (gen_align_2 (), scan);
3165 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3166 scan = emit_label_after (lab, scan);
3167 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3169 for (ref = p->wend; ref; ref = ref->next)
3172 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3175 else if (p->mode == DFmode)
3183 scan = emit_insn_after (gen_align_4 (), scan);
3185 for (; start != barrier; start = NEXT_INSN (start))
3186 if (GET_CODE (start) == INSN
3187 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3189 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3190 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3192 scan = emit_label_after (lab, scan);
3195 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3197 rtx align_insn = NULL_RTX;
3199 scan = emit_label_after (gen_label_rtx (), scan);
3200 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3203 for (i = 0; i < pool_size; i++)
3205 pool_node *p = &pool_vector[i];
3213 if (align_insn && !p->part_of_sequence_p)
3215 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3216 emit_label_before (lab, align_insn);
3217 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3219 for (ref = p->wend; ref; ref = ref->next)
3222 emit_insn_before (gen_consttable_window_end (lab),
3225 delete_insn (align_insn);
3226 align_insn = NULL_RTX;
3231 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3232 scan = emit_label_after (lab, scan);
3233 scan = emit_insn_after (gen_consttable_4 (p->value,
3235 need_align = ! need_align;
3241 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3246 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3247 scan = emit_label_after (lab, scan);
3248 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3255 if (p->mode != HImode)
3257 for (ref = p->wend; ref; ref = ref->next)
3260 scan = emit_insn_after (gen_consttable_window_end (lab),
3269 for (i = 0; i < pool_size; i++)
3271 pool_node *p = &pool_vector[i];
3282 scan = emit_label_after (gen_label_rtx (), scan);
3283 scan = emit_insn_after (gen_align_4 (), scan);
3285 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3286 scan = emit_label_after (lab, scan);
3287 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3295 scan = emit_label_after (gen_label_rtx (), scan);
3296 scan = emit_insn_after (gen_align_4 (), scan);
3298 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3299 scan = emit_label_after (lab, scan);
3300 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3307 if (p->mode != HImode)
3309 for (ref = p->wend; ref; ref = ref->next)
3312 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3317 scan = emit_insn_after (gen_consttable_end (), scan);
3318 scan = emit_barrier_after (scan);
3320 pool_window_label = NULL_RTX;
3321 pool_window_last = 0;
3324 /* Return nonzero if constant would be an ok source for a
3325 mov.w instead of a mov.l. */
3330 return (GET_CODE (src) == CONST_INT
3331 && INTVAL (src) >= -32768
3332 && INTVAL (src) <= 32767);
3335 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3337 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3338 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3339 need to fix it if the input value is CONST_OK_FOR_I08. */
3342 broken_move (rtx insn)
3344 if (GET_CODE (insn) == INSN)
3346 rtx pat = PATTERN (insn);
3347 if (GET_CODE (pat) == PARALLEL)
3348 pat = XVECEXP (pat, 0, 0);
3349 if (GET_CODE (pat) == SET
3350 /* We can load any 8 bit value if we don't care what the high
3351 order bits end up as. */
3352 && GET_MODE (SET_DEST (pat)) != QImode
3353 && (CONSTANT_P (SET_SRC (pat))
3354 /* Match mova_const. */
3355 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3356 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3357 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3359 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3360 && (fp_zero_operand (SET_SRC (pat))
3361 || fp_one_operand (SET_SRC (pat)))
3362 /* ??? If this is a -m4 or -m4-single compilation, in general
3363 we don't know the current setting of fpscr, so disable fldi.
3364 There is an exception if this was a register-register move
3365 before reload - and hence it was ascertained that we have
3366 single precision setting - and in a post-reload optimization
3367 we changed this to do a constant load. In that case
3368 we don't have an r0 clobber, hence we must use fldi. */
3369 && (! TARGET_SH4 || TARGET_FMOVD
3370 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3372 && GET_CODE (SET_DEST (pat)) == REG
3373 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3375 && GET_MODE (SET_DEST (pat)) == SImode
3376 && GET_CODE (SET_SRC (pat)) == CONST_INT
3377 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3378 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3379 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3389 return (GET_CODE (insn) == INSN
3390 && GET_CODE (PATTERN (insn)) == SET
3391 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3392 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3393 /* Don't match mova_const. */
3394 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3397 /* Fix up a mova from a switch that went out of range. */
3399 fixup_mova (rtx mova)
3403 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3404 INSN_CODE (mova) = -1;
3409 rtx lab = gen_label_rtx ();
3410 rtx wpat, wpat0, wpat1, wsrc, diff;
3414 worker = NEXT_INSN (worker);
3416 && GET_CODE (worker) != CODE_LABEL
3417 && GET_CODE (worker) != JUMP_INSN);
3418 } while (GET_CODE (worker) == NOTE
3419 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3420 wpat = PATTERN (worker);
3421 wpat0 = XVECEXP (wpat, 0, 0);
3422 wpat1 = XVECEXP (wpat, 0, 1);
3423 wsrc = SET_SRC (wpat0);
3424 PATTERN (worker) = (gen_casesi_worker_2
3425 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3426 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3428 INSN_CODE (worker) = -1;
3429 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3430 gen_rtx_LABEL_REF (Pmode, lab));
3431 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3432 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3433 INSN_CODE (mova) = -1;
3437 /* Find the last barrier from insn FROM which is close enough to hold the
3438 constant pool. If we can't find one, then create one near the end of
3442 find_barrier (int num_mova, rtx mova, rtx from)
3451 int leading_mova = num_mova;
3452 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3456 /* For HImode: range is 510, add 4 because pc counts from address of
3457 second instruction after this one, subtract 2 for the jump instruction
3458 that we may need to emit before the table, subtract 2 for the instruction
3459 that fills the jump delay slot (in very rare cases, reorg will take an
3460 instruction from after the constant pool or will leave the delay slot
3461 empty). This gives 510.
3462 For SImode: range is 1020, add 4 because pc counts from address of
3463 second instruction after this one, subtract 2 in case pc is 2 byte
3464 aligned, subtract 2 for the jump instruction that we may need to emit
3465 before the table, subtract 2 for the instruction that fills the jump
3466 delay slot. This gives 1018. */
3468 /* The branch will always be shortened now that the reference address for
3469 forward branches is the successor address, thus we need no longer make
3470 adjustments to the [sh]i_limit for -O0. */
3475 while (from && count_si < si_limit && count_hi < hi_limit)
3477 int inc = get_attr_length (from);
3480 if (GET_CODE (from) == CODE_LABEL)
3483 new_align = 1 << label_to_alignment (from);
3484 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3485 new_align = 1 << barrier_align (from);
3491 if (GET_CODE (from) == BARRIER)
3494 found_barrier = from;
3496 /* If we are at the end of the function, or in front of an alignment
3497 instruction, we need not insert an extra alignment. We prefer
3498 this kind of barrier. */
3499 if (barrier_align (from) > 2)
3500 good_barrier = from;
3503 if (broken_move (from))
3506 enum machine_mode mode;
3508 pat = PATTERN (from);
3509 if (GET_CODE (pat) == PARALLEL)
3510 pat = XVECEXP (pat, 0, 0);
3511 src = SET_SRC (pat);
3512 dst = SET_DEST (pat);
3513 mode = GET_MODE (dst);
3515 /* We must explicitly check the mode, because sometimes the
3516 front end will generate code to load unsigned constants into
3517 HImode targets without properly sign extending them. */
3519 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3522 /* We put the short constants before the long constants, so
3523 we must count the length of short constants in the range
3524 for the long constants. */
3525 /* ??? This isn't optimal, but is easy to do. */
3530 /* We dump DF/DI constants before SF/SI ones, because
3531 the limit is the same, but the alignment requirements
3532 are higher. We may waste up to 4 additional bytes
3533 for alignment, and the DF/DI constant may have
3534 another SF/SI constant placed before it. */
3535 if (TARGET_SHCOMPACT
3537 && (mode == DFmode || mode == DImode))
3542 while (si_align > 2 && found_si + si_align - 2 > count_si)
3544 if (found_si > count_si)
3545 count_si = found_si;
3546 found_si += GET_MODE_SIZE (mode);
3548 si_limit -= GET_MODE_SIZE (mode);
3558 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3560 if (found_si > count_si)
3561 count_si = found_si;
3563 else if (GET_CODE (from) == JUMP_INSN
3564 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3565 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3569 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3571 /* We have just passed the barrier in front of the
3572 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3573 the ADDR_DIFF_VEC is accessed as data, just like our pool
3574 constants, this is a good opportunity to accommodate what
3575 we have gathered so far.
3576 If we waited any longer, we could end up at a barrier in
3577 front of code, which gives worse cache usage for separated
3578 instruction / data caches. */
3579 good_barrier = found_barrier;
3584 rtx body = PATTERN (from);
3585 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3588 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3589 else if (GET_CODE (from) == JUMP_INSN
3591 && ! TARGET_SMALLCODE)
3597 if (new_align > si_align)
3599 si_limit -= (count_si - 1) & (new_align - si_align);
3600 si_align = new_align;
3602 count_si = (count_si + new_align - 1) & -new_align;
3607 if (new_align > hi_align)
3609 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3610 hi_align = new_align;
3612 count_hi = (count_hi + new_align - 1) & -new_align;
3614 from = NEXT_INSN (from);
3621 /* Try as we might, the leading mova is out of range. Change
3622 it into a load (which will become a pcload) and retry. */
3624 return find_barrier (0, 0, mova);
3628 /* Insert the constant pool table before the mova instruction,
3629 to prevent the mova label reference from going out of range. */
3631 good_barrier = found_barrier = barrier_before_mova;
3637 if (good_barrier && next_real_insn (found_barrier))
3638 found_barrier = good_barrier;
3642 /* We didn't find a barrier in time to dump our stuff,
3643 so we'll make one. */
3644 rtx label = gen_label_rtx ();
3646 /* If we exceeded the range, then we must back up over the last
3647 instruction we looked at. Otherwise, we just need to undo the
3648 NEXT_INSN at the end of the loop. */
3649 if (count_hi > hi_limit || count_si > si_limit)
3650 from = PREV_INSN (PREV_INSN (from));
3652 from = PREV_INSN (from);
3654 /* Walk back to be just before any jump or label.
3655 Putting it before a label reduces the number of times the branch
3656 around the constant pool table will be hit. Putting it before
3657 a jump makes it more likely that the bra delay slot will be
3659 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3660 || GET_CODE (from) == CODE_LABEL)
3661 from = PREV_INSN (from);
3663 from = emit_jump_insn_after (gen_jump (label), from);
3664 JUMP_LABEL (from) = label;
3665 LABEL_NUSES (label) = 1;
3666 found_barrier = emit_barrier_after (from);
3667 emit_label_after (label, found_barrier);
3670 return found_barrier;
3673 /* If the instruction INSN is implemented by a special function, and we can
3674 positively find the register that is used to call the sfunc, and this
3675 register is not used anywhere else in this instruction - except as the
3676 destination of a set, return this register; else, return 0. */
3678 sfunc_uses_reg (rtx insn)
3681 rtx pattern, part, reg_part, reg;
3683 if (GET_CODE (insn) != INSN)
3685 pattern = PATTERN (insn);
3686 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3689 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3691 part = XVECEXP (pattern, 0, i);
3692 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3697 reg = XEXP (reg_part, 0);
3698 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3700 part = XVECEXP (pattern, 0, i);
3701 if (part == reg_part || GET_CODE (part) == CLOBBER)
3703 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3704 && GET_CODE (SET_DEST (part)) == REG)
3705 ? SET_SRC (part) : part)))
3711 /* See if the only way in which INSN uses REG is by calling it, or by
3712 setting it while calling it. Set *SET to a SET rtx if the register
3716 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3722 reg2 = sfunc_uses_reg (insn);
3723 if (reg2 && REGNO (reg2) == REGNO (reg))
3725 pattern = single_set (insn);
3727 && GET_CODE (SET_DEST (pattern)) == REG
3728 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3732 if (GET_CODE (insn) != CALL_INSN)
3734 /* We don't use rtx_equal_p because we don't care if the mode is
3736 pattern = single_set (insn);
3738 && GET_CODE (SET_DEST (pattern)) == REG
3739 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3745 par = PATTERN (insn);
3746 if (GET_CODE (par) == PARALLEL)
3747 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3749 part = XVECEXP (par, 0, i);
3750 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3753 return reg_mentioned_p (reg, SET_SRC (pattern));
3759 pattern = PATTERN (insn);
3761 if (GET_CODE (pattern) == PARALLEL)
3765 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3766 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3768 pattern = XVECEXP (pattern, 0, 0);
3771 if (GET_CODE (pattern) == SET)
3773 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3775 /* We don't use rtx_equal_p, because we don't care if the
3776 mode is different. */
3777 if (GET_CODE (SET_DEST (pattern)) != REG
3778 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3784 pattern = SET_SRC (pattern);
3787 if (GET_CODE (pattern) != CALL
3788 || GET_CODE (XEXP (pattern, 0)) != MEM
3789 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3795 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3796 general registers. Bits 0..15 mean that the respective registers
3797 are used as inputs in the instruction. Bits 16..31 mean that the
3798 registers 0..15, respectively, are used as outputs, or are clobbered.
3799 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3801 regs_used (rtx x, int is_dest)
3809 code = GET_CODE (x);
3814 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3815 << (REGNO (x) + is_dest));
3819 rtx y = SUBREG_REG (x);
3821 if (GET_CODE (y) != REG)
3824 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3826 subreg_regno_offset (REGNO (y),
3829 GET_MODE (x)) + is_dest));
3833 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3835 /* If there was a return value, it must have been indicated with USE. */
3850 fmt = GET_RTX_FORMAT (code);
3852 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3857 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3858 used |= regs_used (XVECEXP (x, i, j), is_dest);
3860 else if (fmt[i] == 'e')
3861 used |= regs_used (XEXP (x, i), is_dest);
3866 /* Create an instruction that prevents redirection of a conditional branch
3867 to the destination of the JUMP with address ADDR.
3868 If the branch needs to be implemented as an indirect jump, try to find
3869 a scratch register for it.
3870 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3871 If any preceding insn that doesn't fit into a delay slot is good enough,
3872 pass 1. Pass 2 if a definite blocking insn is needed.
3873 -1 is used internally to avoid deep recursion.
3874 If a blocking instruction is made or recognized, return it. */
3877 gen_block_redirect (rtx jump, int addr, int need_block)
3880 rtx prev = prev_nonnote_insn (jump);
3883 /* First, check if we already have an instruction that satisfies our need. */
3884 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3886 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3888 if (GET_CODE (PATTERN (prev)) == USE
3889 || GET_CODE (PATTERN (prev)) == CLOBBER
3890 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3892 else if ((need_block &= ~1) < 0)
3894 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3897 if (GET_CODE (PATTERN (jump)) == RETURN)
3901 /* Reorg even does nasty things with return insns that cause branches
3902 to go out of range - see find_end_label and callers. */
3903 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3905 /* We can't use JUMP_LABEL here because it might be undefined
3906 when not optimizing. */
3907 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3908 /* If the branch is out of range, try to find a scratch register for it. */
3910 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3914 /* Don't look for the stack pointer as a scratch register,
3915 it would cause trouble if an interrupt occurred. */
3916 unsigned try = 0x7fff, used;
3917 int jump_left = flag_expensive_optimizations + 1;
3919 /* It is likely that the most recent eligible instruction is wanted for
3920 the delay slot. Therefore, find out which registers it uses, and
3921 try to avoid using them. */
3923 for (scan = jump; (scan = PREV_INSN (scan)); )
3927 if (INSN_DELETED_P (scan))
3929 code = GET_CODE (scan);
3930 if (code == CODE_LABEL || code == JUMP_INSN)
3933 && GET_CODE (PATTERN (scan)) != USE
3934 && GET_CODE (PATTERN (scan)) != CLOBBER
3935 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3937 try &= ~regs_used (PATTERN (scan), 0);
3941 for (used = dead = 0, scan = JUMP_LABEL (jump);
3942 (scan = NEXT_INSN (scan)); )
3946 if (INSN_DELETED_P (scan))
3948 code = GET_CODE (scan);
3951 used |= regs_used (PATTERN (scan), 0);
3952 if (code == CALL_INSN)
3953 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3954 dead |= (used >> 16) & ~used;
3960 if (code == JUMP_INSN)
3962 if (jump_left-- && simplejump_p (scan))
3963 scan = JUMP_LABEL (scan);
3969 /* Mask out the stack pointer again, in case it was
3970 the only 'free' register we have found. */
3973 /* If the immediate destination is still in range, check for possible
3974 threading with a jump beyond the delay slot insn.
3975 Don't check if we are called recursively; the jump has been or will be
3976 checked in a different invocation then. */
3978 else if (optimize && need_block >= 0)
3980 rtx next = next_active_insn (next_active_insn (dest));
3981 if (next && GET_CODE (next) == JUMP_INSN
3982 && GET_CODE (PATTERN (next)) == SET
3983 && recog_memoized (next) == CODE_FOR_jump_compact)
3985 dest = JUMP_LABEL (next);
3987 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3989 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3995 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3997 /* It would be nice if we could convert the jump into an indirect
3998 jump / far branch right now, and thus exposing all constituent
3999 instructions to further optimization. However, reorg uses
4000 simplejump_p to determine if there is an unconditional jump where
4001 it should try to schedule instructions from the target of the
4002 branch; simplejump_p fails for indirect jumps even if they have
4004 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4005 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4007 /* ??? We would like this to have the scope of the jump, but that
4008 scope will change when a delay slot insn of an inner scope is added.
4009 Hence, after delay slot scheduling, we'll have to expect
4010 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4013 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4014 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4017 else if (need_block)
4018 /* We can't use JUMP_LABEL here because it might be undefined
4019 when not optimizing. */
4020 return emit_insn_before (gen_block_branch_redirect
4021 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4026 #define CONDJUMP_MIN -252
4027 #define CONDJUMP_MAX 262
4030 /* A label (to be placed) in front of the jump
4031 that jumps to our ultimate destination. */
4033 /* Where we are going to insert it if we cannot move the jump any farther,
4034 or the jump itself if we have picked up an existing jump. */
4036 /* The ultimate destination. */
4038 struct far_branch *prev;
4039 /* If the branch has already been created, its address;
4040 else the address of its first prospective user. */
4044 static void gen_far_branch (struct far_branch *);
4045 enum mdep_reorg_phase_e mdep_reorg_phase;
4047 gen_far_branch (struct far_branch *bp)
4049 rtx insn = bp->insert_place;
4051 rtx label = gen_label_rtx ();
4054 emit_label_after (label, insn);
4057 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4058 LABEL_NUSES (bp->far_label)++;
4061 jump = emit_jump_insn_after (gen_return (), insn);
4062 /* Emit a barrier so that reorg knows that any following instructions
4063 are not reachable via a fall-through path.
4064 But don't do this when not optimizing, since we wouldn't suppress the
4065 alignment for the barrier then, and could end up with out-of-range
4066 pc-relative loads. */
4068 emit_barrier_after (jump);
4069 emit_label_after (bp->near_label, insn);
4070 JUMP_LABEL (jump) = bp->far_label;
4071 ok = invert_jump (insn, label, 1);
4074 /* If we are branching around a jump (rather than a return), prevent
4075 reorg from using an insn from the jump target as the delay slot insn -
4076 when reorg did this, it pessimized code (we rather hide the delay slot)
4077 and it could cause branches to go out of range. */
4080 (gen_stuff_delay_slot
4081 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4082 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4084 /* Prevent reorg from undoing our splits. */
4085 gen_block_redirect (jump, bp->address += 2, 2);
4088 /* Fix up ADDR_DIFF_VECs. */
4090 fixup_addr_diff_vecs (rtx first)
4094 for (insn = first; insn; insn = NEXT_INSN (insn))
4096 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4098 if (GET_CODE (insn) != JUMP_INSN
4099 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4101 pat = PATTERN (insn);
4102 vec_lab = XEXP (XEXP (pat, 0), 0);
4104 /* Search the matching casesi_jump_2. */
4105 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4107 if (GET_CODE (prev) != JUMP_INSN)
4109 prevpat = PATTERN (prev);
4110 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4112 x = XVECEXP (prevpat, 0, 1);
4113 if (GET_CODE (x) != USE)
4116 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4119 /* FIXME: This is a bug in the optimizer, but it seems harmless
4120 to just avoid panicing. */
4124 /* Emit the reference label of the braf where it belongs, right after
4125 the casesi_jump_2 (i.e. braf). */
4126 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4127 emit_label_after (braf_label, prev);
4129 /* Fix up the ADDR_DIF_VEC to be relative
4130 to the reference address of the braf. */
4131 XEXP (XEXP (pat, 0), 0) = braf_label;
4135 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4136 a barrier. Return the base 2 logarithm of the desired alignment. */
4138 barrier_align (rtx barrier_or_label)
4140 rtx next = next_real_insn (barrier_or_label), pat, prev;
4141 int slot, credit, jump_to_next = 0;
4146 pat = PATTERN (next);
4148 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4151 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4152 /* This is a barrier in front of a constant table. */
4155 prev = prev_real_insn (barrier_or_label);
4156 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4158 pat = PATTERN (prev);
4159 /* If this is a very small table, we want to keep the alignment after
4160 the table to the minimum for proper code alignment. */
4161 return ((TARGET_SMALLCODE
4162 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4163 <= (unsigned) 1 << (CACHE_LOG - 2)))
4164 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4167 if (TARGET_SMALLCODE)
4170 if (! TARGET_SH2 || ! optimize)
4171 return align_jumps_log;
4173 /* When fixing up pcloads, a constant table might be inserted just before
4174 the basic block that ends with the barrier. Thus, we can't trust the
4175 instruction lengths before that. */
4176 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4178 /* Check if there is an immediately preceding branch to the insn beyond
4179 the barrier. We must weight the cost of discarding useful information
4180 from the current cache line when executing this branch and there is
4181 an alignment, against that of fetching unneeded insn in front of the
4182 branch target when there is no alignment. */
4184 /* There are two delay_slot cases to consider. One is the simple case
4185 where the preceding branch is to the insn beyond the barrier (simple
4186 delay slot filling), and the other is where the preceding branch has
4187 a delay slot that is a duplicate of the insn after the barrier
4188 (fill_eager_delay_slots) and the branch is to the insn after the insn
4189 after the barrier. */
4191 /* PREV is presumed to be the JUMP_INSN for the barrier under
4192 investigation. Skip to the insn before it. */
4193 prev = prev_real_insn (prev);
4195 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4196 credit >= 0 && prev && GET_CODE (prev) == INSN;
4197 prev = prev_real_insn (prev))
4200 if (GET_CODE (PATTERN (prev)) == USE
4201 || GET_CODE (PATTERN (prev)) == CLOBBER)
4203 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4205 prev = XVECEXP (PATTERN (prev), 0, 1);
4206 if (INSN_UID (prev) == INSN_UID (next))
4208 /* Delay slot was filled with insn at jump target. */
4215 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4217 credit -= get_attr_length (prev);
4220 && GET_CODE (prev) == JUMP_INSN
4221 && JUMP_LABEL (prev))
4225 || next_real_insn (JUMP_LABEL (prev)) == next
4226 /* If relax_delay_slots() decides NEXT was redundant
4227 with some previous instruction, it will have
4228 redirected PREV's jump to the following insn. */
4229 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4230 /* There is no upper bound on redundant instructions
4231 that might have been skipped, but we must not put an
4232 alignment where none had been before. */
4233 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4235 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4236 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4237 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4239 rtx pat = PATTERN (prev);
4240 if (GET_CODE (pat) == PARALLEL)
4241 pat = XVECEXP (pat, 0, 0);
4242 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4248 return align_jumps_log;
4251 /* If we are inside a phony loop, almost any kind of label can turn up as the
4252 first one in the loop. Aligning a braf label causes incorrect switch
4253 destination addresses; we can detect braf labels because they are
4254 followed by a BARRIER.
4255 Applying loop alignment to small constant or switch tables is a waste
4256 of space, so we suppress this too. */
4258 sh_loop_align (rtx label)
4263 next = next_nonnote_insn (next);
4264 while (next && GET_CODE (next) == CODE_LABEL);
4268 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4269 || recog_memoized (next) == CODE_FOR_consttable_2)
4272 return align_loops_log;
4275 /* Do a final pass over the function, just before delayed branch
4281 rtx first, insn, mova = NULL_RTX;
4283 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4284 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4286 first = get_insns ();
4288 /* We must split call insns before introducing `mova's. If we're
4289 optimizing, they'll have already been split. Otherwise, make
4290 sure we don't split them too late. */
4292 split_all_insns_noflow ();
4297 /* If relaxing, generate pseudo-ops to associate function calls with
4298 the symbols they call. It does no harm to not generate these
4299 pseudo-ops. However, when we can generate them, it enables to
4300 linker to potentially relax the jsr to a bsr, and eliminate the
4301 register load and, possibly, the constant pool entry. */
4303 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4306 /* Remove all REG_LABEL notes. We want to use them for our own
4307 purposes. This works because none of the remaining passes
4308 need to look at them.
4310 ??? But it may break in the future. We should use a machine
4311 dependent REG_NOTE, or some other approach entirely. */
4312 for (insn = first; insn; insn = NEXT_INSN (insn))
4318 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4319 remove_note (insn, note);
4323 for (insn = first; insn; insn = NEXT_INSN (insn))
4325 rtx pattern, reg, link, set, scan, dies, label;
4326 int rescan = 0, foundinsn = 0;
4328 if (GET_CODE (insn) == CALL_INSN)
4330 pattern = PATTERN (insn);
4332 if (GET_CODE (pattern) == PARALLEL)
4333 pattern = XVECEXP (pattern, 0, 0);
4334 if (GET_CODE (pattern) == SET)
4335 pattern = SET_SRC (pattern);
4337 if (GET_CODE (pattern) != CALL
4338 || GET_CODE (XEXP (pattern, 0)) != MEM)
4341 reg = XEXP (XEXP (pattern, 0), 0);
4345 reg = sfunc_uses_reg (insn);
4350 if (GET_CODE (reg) != REG)
4353 /* This is a function call via REG. If the only uses of REG
4354 between the time that it is set and the time that it dies
4355 are in function calls, then we can associate all the
4356 function calls with the setting of REG. */
4358 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4360 if (REG_NOTE_KIND (link) != 0)
4362 set = single_set (XEXP (link, 0));
4363 if (set && rtx_equal_p (reg, SET_DEST (set)))
4365 link = XEXP (link, 0);
4372 /* ??? Sometimes global register allocation will have
4373 deleted the insn pointed to by LOG_LINKS. Try
4374 scanning backward to find where the register is set. */
4375 for (scan = PREV_INSN (insn);
4376 scan && GET_CODE (scan) != CODE_LABEL;
4377 scan = PREV_INSN (scan))
4379 if (! INSN_P (scan))
4382 if (! reg_mentioned_p (reg, scan))
4385 if (noncall_uses_reg (reg, scan, &set))
4399 /* The register is set at LINK. */
4401 /* We can only optimize the function call if the register is
4402 being set to a symbol. In theory, we could sometimes
4403 optimize calls to a constant location, but the assembler
4404 and linker do not support that at present. */
4405 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4406 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4409 /* Scan forward from LINK to the place where REG dies, and
4410 make sure that the only insns which use REG are
4411 themselves function calls. */
4413 /* ??? This doesn't work for call targets that were allocated
4414 by reload, since there may not be a REG_DEAD note for the
4418 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4422 /* Don't try to trace forward past a CODE_LABEL if we haven't
4423 seen INSN yet. Ordinarily, we will only find the setting insn
4424 in LOG_LINKS if it is in the same basic block. However,
4425 cross-jumping can insert code labels in between the load and
4426 the call, and can result in situations where a single call
4427 insn may have two targets depending on where we came from. */
4429 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4432 if (! INSN_P (scan))
4435 /* Don't try to trace forward past a JUMP. To optimize
4436 safely, we would have to check that all the
4437 instructions at the jump destination did not use REG. */
4439 if (GET_CODE (scan) == JUMP_INSN)
4442 if (! reg_mentioned_p (reg, scan))
4445 if (noncall_uses_reg (reg, scan, &scanset))
4452 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4454 /* There is a function call to this register other
4455 than the one we are checking. If we optimize
4456 this call, we need to rescan again below. */
4460 /* ??? We shouldn't have to worry about SCANSET here.
4461 We should just be able to check for a REG_DEAD note
4462 on a function call. However, the REG_DEAD notes are
4463 apparently not dependable around libcalls; c-torture
4464 execute/920501-2 is a test case. If SCANSET is set,
4465 then this insn sets the register, so it must have
4466 died earlier. Unfortunately, this will only handle
4467 the cases in which the register is, in fact, set in a
4470 /* ??? We shouldn't have to use FOUNDINSN here.
4471 However, the LOG_LINKS fields are apparently not
4472 entirely reliable around libcalls;
4473 newlib/libm/math/e_pow.c is a test case. Sometimes
4474 an insn will appear in LOG_LINKS even though it is
4475 not the most recent insn which sets the register. */
4479 || find_reg_note (scan, REG_DEAD, reg)))
4488 /* Either there was a branch, or some insn used REG
4489 other than as a function call address. */
4493 /* Create a code label, and put it in a REG_LABEL note on
4494 the insn which sets the register, and on each call insn
4495 which uses the register. In final_prescan_insn we look
4496 for the REG_LABEL notes, and output the appropriate label
4499 label = gen_label_rtx ();
4500 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4502 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4511 scan = NEXT_INSN (scan);
4513 && ((GET_CODE (scan) == CALL_INSN
4514 && reg_mentioned_p (reg, scan))
4515 || ((reg2 = sfunc_uses_reg (scan))
4516 && REGNO (reg2) == REGNO (reg))))
4518 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4520 while (scan != dies);
4526 fixup_addr_diff_vecs (first);
4530 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4531 shorten_branches (first);
4534 /* Scan the function looking for move instructions which have to be
4535 changed to pc-relative loads and insert the literal tables. */
4536 label_ref_list_pool = create_alloc_pool ("label references list",
4537 sizeof (struct label_ref_list_d),
4539 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4540 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4544 /* ??? basic block reordering can move a switch table dispatch
4545 below the switch table. Check if that has happened.
4546 We only have the addresses available when optimizing; but then,
4547 this check shouldn't be needed when not optimizing. */
4548 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4550 && (INSN_ADDRESSES (INSN_UID (insn))
4551 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4553 /* Change the mova into a load.
4554 broken_move will then return true for it. */
4557 else if (! num_mova++)
4560 else if (GET_CODE (insn) == JUMP_INSN
4561 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4569 /* Some code might have been inserted between the mova and
4570 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4571 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4572 total += get_attr_length (scan);
4574 /* range of mova is 1020, add 4 because pc counts from address of
4575 second instruction after this one, subtract 2 in case pc is 2
4576 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4577 cancels out with alignment effects of the mova itself. */
4580 /* Change the mova into a load, and restart scanning
4581 there. broken_move will then return true for mova. */
4586 if (broken_move (insn)
4587 || (GET_CODE (insn) == INSN
4588 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4591 /* Scan ahead looking for a barrier to stick the constant table
4593 rtx barrier = find_barrier (num_mova, mova, insn);
4594 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4595 int need_aligned_label = 0;
4597 if (num_mova && ! mova_p (mova))
4599 /* find_barrier had to change the first mova into a
4600 pcload; thus, we have to start with this new pcload. */
4604 /* Now find all the moves between the points and modify them. */
4605 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4607 if (GET_CODE (scan) == CODE_LABEL)
4609 if (GET_CODE (scan) == INSN
4610 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4611 need_aligned_label = 1;
4612 if (broken_move (scan))
4614 rtx *patp = &PATTERN (scan), pat = *patp;
4618 enum machine_mode mode;
4620 if (GET_CODE (pat) == PARALLEL)
4621 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4622 src = SET_SRC (pat);
4623 dst = SET_DEST (pat);
4624 mode = GET_MODE (dst);
4626 if (mode == SImode && hi_const (src)
4627 && REGNO (dst) != FPUL_REG)
4632 while (GET_CODE (dst) == SUBREG)
4634 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4635 GET_MODE (SUBREG_REG (dst)),
4638 dst = SUBREG_REG (dst);
4640 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4642 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4644 /* This must be an insn that clobbers r0. */
4645 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4646 XVECLEN (PATTERN (scan), 0)
4648 rtx clobber = *clobberp;
4650 gcc_assert (GET_CODE (clobber) == CLOBBER
4651 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4654 && reg_set_between_p (r0_rtx, last_float_move, scan))
4658 && GET_MODE_SIZE (mode) != 4
4659 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4661 lab = add_constant (src, mode, last_float);
4663 emit_insn_before (gen_mova (lab), scan);
4666 /* There will be a REG_UNUSED note for r0 on
4667 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4668 lest reorg:mark_target_live_regs will not
4669 consider r0 to be used, and we end up with delay
4670 slot insn in front of SCAN that clobbers r0. */
4672 = find_regno_note (last_float_move, REG_UNUSED, 0);
4674 /* If we are not optimizing, then there may not be
4677 PUT_MODE (note, REG_INC);
4679 *last_float_addr = r0_inc_rtx;
4681 last_float_move = scan;
4683 newsrc = gen_const_mem (mode,
4684 (((TARGET_SH4 && ! TARGET_FMOVD)
4685 || REGNO (dst) == FPUL_REG)
4688 last_float_addr = &XEXP (newsrc, 0);
4690 /* Remove the clobber of r0. */
4691 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4692 gen_rtx_SCRATCH (Pmode));
4694 /* This is a mova needing a label. Create it. */
4695 else if (GET_CODE (src) == UNSPEC
4696 && XINT (src, 1) == UNSPEC_MOVA
4697 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4699 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4700 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4701 newsrc = gen_rtx_UNSPEC (SImode,
4702 gen_rtvec (1, newsrc),
4707 lab = add_constant (src, mode, 0);
4708 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4709 newsrc = gen_const_mem (mode, newsrc);
4711 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4712 INSN_CODE (scan) = -1;
4715 dump_table (need_aligned_label ? insn : 0, barrier);
4719 free_alloc_pool (label_ref_list_pool);
4721 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4722 INSN_ADDRESSES_FREE ();
4723 split_branches (first);
4725 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4726 also has an effect on the register that holds the address of the sfunc.
4727 Insert an extra dummy insn in front of each sfunc that pretends to
4728 use this register. */
4729 if (flag_delayed_branch)
4731 for (insn = first; insn; insn = NEXT_INSN (insn))
4733 rtx reg = sfunc_uses_reg (insn);
4737 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4741 /* fpscr is not actually a user variable, but we pretend it is for the
4742 sake of the previous optimization passes, since we want it handled like
4743 one. However, we don't have any debugging information for it, so turn
4744 it into a non-user variable now. */
4746 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4748 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4752 get_dest_uid (rtx label, int max_uid)
4754 rtx dest = next_real_insn (label);
4757 /* This can happen for an undefined label. */
4759 dest_uid = INSN_UID (dest);
4760 /* If this is a newly created branch redirection blocking instruction,
4761 we cannot index the branch_uid or insn_addresses arrays with its
4762 uid. But then, we won't need to, because the actual destination is
4763 the following branch. */
4764 while (dest_uid >= max_uid)
4766 dest = NEXT_INSN (dest);
4767 dest_uid = INSN_UID (dest);
4769 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4774 /* Split condbranches that are out of range. Also add clobbers for
4775 scratch registers that are needed in far jumps.
4776 We do this before delay slot scheduling, so that it can take our
4777 newly created instructions into account. It also allows us to
4778 find branches with common targets more easily. */
4781 split_branches (rtx first)
4784 struct far_branch **uid_branch, *far_branch_list = 0;
4785 int max_uid = get_max_uid ();
4788 /* Find out which branches are out of range. */
4789 shorten_branches (first);
4791 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4792 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4794 for (insn = first; insn; insn = NEXT_INSN (insn))
4795 if (! INSN_P (insn))
4797 else if (INSN_DELETED_P (insn))
4799 /* Shorten_branches would split this instruction again,
4800 so transform it into a note. */
4801 PUT_CODE (insn, NOTE);
4802 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4803 NOTE_SOURCE_FILE (insn) = 0;
4805 else if (GET_CODE (insn) == JUMP_INSN
4806 /* Don't mess with ADDR_DIFF_VEC */
4807 && (GET_CODE (PATTERN (insn)) == SET
4808 || GET_CODE (PATTERN (insn)) == RETURN))
4810 enum attr_type type = get_attr_type (insn);
4811 if (type == TYPE_CBRANCH)
4815 if (get_attr_length (insn) > 4)
4817 rtx src = SET_SRC (PATTERN (insn));
4818 rtx olabel = XEXP (XEXP (src, 1), 0);
4819 int addr = INSN_ADDRESSES (INSN_UID (insn));
4821 int dest_uid = get_dest_uid (olabel, max_uid);
4822 struct far_branch *bp = uid_branch[dest_uid];
4824 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4825 the label if the LABEL_NUSES count drops to zero. There is
4826 always a jump_optimize pass that sets these values, but it
4827 proceeds to delete unreferenced code, and then if not
4828 optimizing, to un-delete the deleted instructions, thus
4829 leaving labels with too low uses counts. */
4832 JUMP_LABEL (insn) = olabel;
4833 LABEL_NUSES (olabel)++;
4837 bp = (struct far_branch *) alloca (sizeof *bp);
4838 uid_branch[dest_uid] = bp;
4839 bp->prev = far_branch_list;
4840 far_branch_list = bp;
4842 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4843 LABEL_NUSES (bp->far_label)++;
4847 label = bp->near_label;
4848 if (! label && bp->address - addr >= CONDJUMP_MIN)
4850 rtx block = bp->insert_place;
4852 if (GET_CODE (PATTERN (block)) == RETURN)
4853 block = PREV_INSN (block);
4855 block = gen_block_redirect (block,
4857 label = emit_label_after (gen_label_rtx (),
4859 bp->near_label = label;
4861 else if (label && ! NEXT_INSN (label))
4863 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4864 bp->insert_place = insn;
4866 gen_far_branch (bp);
4870 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4872 bp->near_label = label = gen_label_rtx ();
4873 bp->insert_place = insn;
4876 ok = redirect_jump (insn, label, 1);
4881 /* get_attr_length (insn) == 2 */
4882 /* Check if we have a pattern where reorg wants to redirect
4883 the branch to a label from an unconditional branch that
4885 /* We can't use JUMP_LABEL here because it might be undefined
4886 when not optimizing. */
4887 /* A syntax error might cause beyond to be NULL_RTX. */
4889 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4893 && (GET_CODE (beyond) == JUMP_INSN
4894 || ((beyond = next_active_insn (beyond))
4895 && GET_CODE (beyond) == JUMP_INSN))
4896 && GET_CODE (PATTERN (beyond)) == SET
4897 && recog_memoized (beyond) == CODE_FOR_jump_compact
4899 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4900 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4902 gen_block_redirect (beyond,
4903 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4906 next = next_active_insn (insn);
4908 if ((GET_CODE (next) == JUMP_INSN
4909 || ((next = next_active_insn (next))
4910 && GET_CODE (next) == JUMP_INSN))
4911 && GET_CODE (PATTERN (next)) == SET
4912 && recog_memoized (next) == CODE_FOR_jump_compact
4914 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4915 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4917 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4919 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4921 int addr = INSN_ADDRESSES (INSN_UID (insn));
4924 struct far_branch *bp;
4926 if (type == TYPE_JUMP)
4928 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4929 dest_uid = get_dest_uid (far_label, max_uid);
4932 /* Parse errors can lead to labels outside
4934 if (! NEXT_INSN (far_label))
4939 JUMP_LABEL (insn) = far_label;
4940 LABEL_NUSES (far_label)++;
4942 redirect_jump (insn, NULL_RTX, 1);
4946 bp = uid_branch[dest_uid];
4949 bp = (struct far_branch *) alloca (sizeof *bp);
4950 uid_branch[dest_uid] = bp;
4951 bp->prev = far_branch_list;
4952 far_branch_list = bp;
4954 bp->far_label = far_label;
4956 LABEL_NUSES (far_label)++;
4958 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4959 if (addr - bp->address <= CONDJUMP_MAX)
4960 emit_label_after (bp->near_label, PREV_INSN (insn));
4963 gen_far_branch (bp);
4969 bp->insert_place = insn;
4971 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4973 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4976 /* Generate all pending far branches,
4977 and free our references to the far labels. */
4978 while (far_branch_list)
4980 if (far_branch_list->near_label
4981 && ! NEXT_INSN (far_branch_list->near_label))
4982 gen_far_branch (far_branch_list);
4984 && far_branch_list->far_label
4985 && ! --LABEL_NUSES (far_branch_list->far_label))
4986 delete_insn (far_branch_list->far_label);
4987 far_branch_list = far_branch_list->prev;
4990 /* Instruction length information is no longer valid due to the new
4991 instructions that have been generated. */
4992 init_insn_lengths ();
4995 /* Dump out instruction addresses, which is useful for debugging the
4996 constant pool table stuff.
4998 If relaxing, output the label and pseudo-ops used to link together
4999 calls and the instruction which set the registers. */
5001 /* ??? The addresses printed by this routine for insns are nonsense for
5002 insns which are inside of a sequence where none of the inner insns have
5003 variable length. This is because the second pass of shorten_branches
5004 does not bother to update them. */
5007 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5008 int noperands ATTRIBUTE_UNUSED)
5010 if (TARGET_DUMPISIZE)
5011 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5017 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5022 pattern = PATTERN (insn);
5023 if (GET_CODE (pattern) == PARALLEL)
5024 pattern = XVECEXP (pattern, 0, 0);
5025 switch (GET_CODE (pattern))
5028 if (GET_CODE (SET_SRC (pattern)) != CALL
5029 && get_attr_type (insn) != TYPE_SFUNC)
5031 targetm.asm_out.internal_label
5032 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5035 /* else FALLTHROUGH */
5037 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5038 CODE_LABEL_NUMBER (XEXP (note, 0)));
5048 /* Dump out any constants accumulated in the final pass. These will
5052 output_jump_label_table (void)
5058 fprintf (asm_out_file, "\t.align 2\n");
5059 for (i = 0; i < pool_size; i++)
5061 pool_node *p = &pool_vector[i];
5063 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5064 CODE_LABEL_NUMBER (p->label));
5065 output_asm_insn (".long %O0", &p->value);
5073 /* A full frame looks like:
5077 [ if current_function_anonymous_args
5090 local-0 <- fp points here. */
5092 /* Number of bytes pushed for anonymous args, used to pass information
5093 between expand_prologue and expand_epilogue. */
5095 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5096 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5097 for an epilogue and a negative value means that it's for a sibcall
5098 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5099 all the registers that are about to be restored, and hence dead. */
5102 output_stack_adjust (int size, rtx reg, int epilogue_p,
5103 HARD_REG_SET *live_regs_mask)
5105 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5108 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5110 /* This test is bogus, as output_stack_adjust is used to re-align the
5113 gcc_assert (!(size % align));
5116 if (CONST_OK_FOR_ADD (size))
5117 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5118 /* Try to do it with two partial adjustments; however, we must make
5119 sure that the stack is properly aligned at all times, in case
5120 an interrupt occurs between the two partial adjustments. */
5121 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5122 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5124 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5125 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5131 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5134 /* If TEMP is invalid, we could temporarily save a general
5135 register to MACL. However, there is currently no need
5136 to handle this case, so just die when we see it. */
5138 || current_function_interrupt
5139 || ! call_really_used_regs[temp] || fixed_regs[temp])
5141 if (temp < 0 && ! current_function_interrupt
5142 && (TARGET_SHMEDIA || epilogue_p >= 0))
5145 COPY_HARD_REG_SET (temps, call_used_reg_set);
5146 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5150 if (current_function_return_rtx)
5152 enum machine_mode mode;
5153 mode = GET_MODE (current_function_return_rtx);
5154 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5155 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5157 for (i = 0; i < nreg; i++)
5158 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5159 if (current_function_calls_eh_return)
5161 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5162 for (i = 0; i <= 3; i++)
5163 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5166 if (TARGET_SHMEDIA && epilogue_p < 0)
5167 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5168 CLEAR_HARD_REG_BIT (temps, i);
5169 if (epilogue_p <= 0)
5171 for (i = FIRST_PARM_REG;
5172 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5173 CLEAR_HARD_REG_BIT (temps, i);
5174 if (cfun->static_chain_decl != NULL)
5175 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5177 temp = scavenge_reg (&temps);
5179 if (temp < 0 && live_regs_mask)
5180 temp = scavenge_reg (live_regs_mask);
5183 rtx adj_reg, tmp_reg, mem;
5185 /* If we reached here, the most likely case is the (sibcall)
5186 epilogue for non SHmedia. Put a special push/pop sequence
5187 for such case as the last resort. This looks lengthy but
5188 would not be problem because it seems to be very
5191 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5194 /* ??? There is still the slight possibility that r4 or
5195 r5 have been reserved as fixed registers or assigned
5196 as global registers, and they change during an
5197 interrupt. There are possible ways to handle this:
5199 - If we are adjusting the frame pointer (r14), we can do
5200 with a single temp register and an ordinary push / pop
5202 - Grab any call-used or call-saved registers (i.e. not
5203 fixed or globals) for the temps we need. We might
5204 also grab r14 if we are adjusting the stack pointer.
5205 If we can't find enough available registers, issue
5206 a diagnostic and die - the user must have reserved
5207 way too many registers.
5208 But since all this is rather unlikely to happen and
5209 would require extra testing, we just die if r4 / r5
5210 are not available. */
5211 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5212 && !global_regs[4] && !global_regs[5]);
5214 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5215 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5216 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5217 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5218 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5219 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5220 emit_move_insn (mem, tmp_reg);
5221 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5222 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5223 emit_move_insn (mem, tmp_reg);
5224 emit_move_insn (reg, adj_reg);
5225 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5226 emit_move_insn (adj_reg, mem);
5227 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5228 emit_move_insn (tmp_reg, mem);
5231 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5233 /* If SIZE is negative, subtract the positive value.
5234 This sometimes allows a constant pool entry to be shared
5235 between prologue and epilogue code. */
5238 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5239 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5243 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5244 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5248 = (gen_rtx_EXPR_LIST
5249 (REG_FRAME_RELATED_EXPR,
5250 gen_rtx_SET (VOIDmode, reg,
5251 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5261 RTX_FRAME_RELATED_P (x) = 1;
5265 /* Output RTL to push register RN onto the stack. */
5272 x = gen_push_fpul ();
5273 else if (rn == FPSCR_REG)
5274 x = gen_push_fpscr ();
5275 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5276 && FP_OR_XD_REGISTER_P (rn))
5278 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5280 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5282 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5283 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5285 x = gen_push (gen_rtx_REG (SImode, rn));
5289 = gen_rtx_EXPR_LIST (REG_INC,
5290 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5294 /* Output RTL to pop register RN from the stack. */
5301 x = gen_pop_fpul ();
5302 else if (rn == FPSCR_REG)
5303 x = gen_pop_fpscr ();
5304 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5305 && FP_OR_XD_REGISTER_P (rn))
5307 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5309 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5311 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5312 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5314 x = gen_pop (gen_rtx_REG (SImode, rn));
5318 = gen_rtx_EXPR_LIST (REG_INC,
5319 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5322 /* Generate code to push the regs specified in the mask. */
5325 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5330 /* Push PR last; this gives better latencies after the prologue, and
5331 candidates for the return delay slot when there are no general
5332 registers pushed. */
5333 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5335 /* If this is an interrupt handler, and the SZ bit varies,
5336 and we have to push any floating point register, we need
5337 to switch to the correct precision first. */
5338 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5339 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5341 HARD_REG_SET unsaved;
5344 COMPL_HARD_REG_SET (unsaved, *mask);
5345 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5349 && (i != FPSCR_REG || ! skip_fpscr)
5350 && TEST_HARD_REG_BIT (*mask, i))
5353 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5357 /* Calculate how much extra space is needed to save all callee-saved
5359 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5362 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5365 int stack_space = 0;
5366 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5368 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5369 if ((! call_really_used_regs[reg] || interrupt_handler)
5370 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5371 /* Leave space to save this target register on the stack,
5372 in case target register allocation wants to use it. */
5373 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5377 /* Decide whether we should reserve space for callee-save target registers,
5378 in case target register allocation wants to use them. REGS_SAVED is
5379 the space, in bytes, that is already required for register saves.
5380 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5383 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5384 HARD_REG_SET *live_regs_mask)
5388 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5391 /* Decide how much space to reserve for callee-save target registers
5392 in case target register allocation wants to use them.
5393 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5396 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5398 if (shmedia_space_reserved_for_target_registers)
5399 return shmedia_target_regs_stack_space (live_regs_mask);
5404 /* Work out the registers which need to be saved, both as a mask and a
5405 count of saved words. Return the count.
5407 If doing a pragma interrupt function, then push all regs used by the
5408 function, and if we call another function (we can tell by looking at PR),
5409 make sure that all the regs it clobbers are safe too. */
5412 calc_live_regs (HARD_REG_SET *live_regs_mask)
5417 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5418 bool nosave_low_regs;
5419 int pr_live, has_call;
5421 attrs = DECL_ATTRIBUTES (current_function_decl);
5422 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5423 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5424 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5425 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5427 CLEAR_HARD_REG_SET (*live_regs_mask);
5428 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5429 && regs_ever_live[FPSCR_REG])
5430 target_flags &= ~MASK_FPU_SINGLE;
5431 /* If we can save a lot of saves by switching to double mode, do that. */
5432 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5433 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5434 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5435 && (! call_really_used_regs[reg]
5436 || interrupt_handler)
5439 target_flags &= ~MASK_FPU_SINGLE;
5442 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5443 knows how to use it. That means the pseudo originally allocated for
5444 the initial value can become the PR_MEDIA_REG hard register, as seen for
5445 execute/20010122-1.c:test9. */
5447 /* ??? this function is called from initial_elimination_offset, hence we
5448 can't use the result of sh_media_register_for_return here. */
5449 pr_live = sh_pr_n_sets ();
5452 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5453 pr_live = (pr_initial
5454 ? (GET_CODE (pr_initial) != REG
5455 || REGNO (pr_initial) != (PR_REG))
5456 : regs_ever_live[PR_REG]);
5457 /* For Shcompact, if not optimizing, we end up with a memory reference
5458 using the return address pointer for __builtin_return_address even
5459 though there is no actual need to put the PR register on the stack. */
5460 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5462 /* Force PR to be live if the prologue has to call the SHmedia
5463 argument decoder or register saver. */
5464 if (TARGET_SHCOMPACT
5465 && ((current_function_args_info.call_cookie
5466 & ~ CALL_COOKIE_RET_TRAMP (1))
5467 || current_function_has_nonlocal_label))
5469 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5470 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5472 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5475 ? (/* Need to save all the regs ever live. */
5476 (regs_ever_live[reg]
5477 || (call_really_used_regs[reg]
5478 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5479 || reg == PIC_OFFSET_TABLE_REGNUM)
5481 || (TARGET_SHMEDIA && has_call
5482 && REGISTER_NATURAL_MODE (reg) == SImode
5483 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5484 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5485 && reg != RETURN_ADDRESS_POINTER_REGNUM
5486 && reg != T_REG && reg != GBR_REG
5487 /* Push fpscr only on targets which have FPU */
5488 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5489 : (/* Only push those regs which are used and need to be saved. */
5492 && current_function_args_info.call_cookie
5493 && reg == PIC_OFFSET_TABLE_REGNUM)
5494 || (regs_ever_live[reg]
5495 && (!call_really_used_regs[reg]
5496 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5497 || (current_function_calls_eh_return
5498 && (reg == EH_RETURN_DATA_REGNO (0)
5499 || reg == EH_RETURN_DATA_REGNO (1)
5500 || reg == EH_RETURN_DATA_REGNO (2)
5501 || reg == EH_RETURN_DATA_REGNO (3)))
5502 || ((reg == MACL_REG || reg == MACH_REG)
5503 && regs_ever_live[reg]
5504 && sh_cfun_attr_renesas_p ())
5507 SET_HARD_REG_BIT (*live_regs_mask, reg);
5508 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5510 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5511 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5513 if (FP_REGISTER_P (reg))
5515 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5517 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5518 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5521 else if (XD_REGISTER_P (reg))
5523 /* Must switch to double mode to access these registers. */
5524 target_flags &= ~MASK_FPU_SINGLE;
5528 if (nosave_low_regs && reg == R8_REG)
5531 /* If we have a target register optimization pass after prologue / epilogue
5532 threading, we need to assume all target registers will be live even if
5534 if (flag_branch_target_load_optimize2
5535 && TARGET_SAVE_ALL_TARGET_REGS
5536 && shmedia_space_reserved_for_target_registers)
5537 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5538 if ((! call_really_used_regs[reg] || interrupt_handler)
5539 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5541 SET_HARD_REG_BIT (*live_regs_mask, reg);
5542 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5544 /* If this is an interrupt handler, we don't have any call-clobbered
5545 registers we can conveniently use for target register save/restore.
5546 Make sure we save at least one general purpose register when we need
5547 to save target registers. */
5548 if (interrupt_handler
5549 && hard_regs_intersect_p (live_regs_mask,
5550 ®_class_contents[TARGET_REGS])
5551 && ! hard_regs_intersect_p (live_regs_mask,
5552 ®_class_contents[GENERAL_REGS]))
5554 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5555 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5561 /* Code to generate prologue and epilogue sequences */
5563 /* PUSHED is the number of bytes that are being pushed on the
5564 stack for register saves. Return the frame size, padded
5565 appropriately so that the stack stays properly aligned. */
5566 static HOST_WIDE_INT
5567 rounded_frame_size (int pushed)
5569 HOST_WIDE_INT size = get_frame_size ();
5570 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5572 return ((size + pushed + align - 1) & -align) - pushed;
5575 /* Choose a call-clobbered target-branch register that remains
5576 unchanged along the whole function. We set it up as the return
5577 value in the prologue. */
5579 sh_media_register_for_return (void)
5584 if (! current_function_is_leaf)
5586 if (lookup_attribute ("interrupt_handler",
5587 DECL_ATTRIBUTES (current_function_decl)))
5589 if (sh_cfun_interrupt_handler_p ())
5592 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5594 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5595 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5601 /* The maximum registers we need to save are:
5602 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5603 - 32 floating point registers (for each pair, we save none,
5604 one single precision value, or a double precision value).
5605 - 8 target registers
5606 - add 1 entry for a delimiter. */
5607 #define MAX_SAVED_REGS (62+32+8)
5609 typedef struct save_entry_s
5618 /* There will be a delimiter entry with VOIDmode both at the start and the
5619 end of a filled in schedule. The end delimiter has the offset of the
5620 save with the smallest (i.e. most negative) offset. */
5621 typedef struct save_schedule_s
5623 save_entry entries[MAX_SAVED_REGS + 2];
5624 int temps[MAX_TEMPS+1];
5627 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5628 use reverse order. Returns the last entry written to (not counting
5629 the delimiter). OFFSET_BASE is a number to be added to all offset
5633 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5637 save_entry *entry = schedule->entries;
5641 if (! current_function_interrupt)
5642 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5643 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5644 && ! FUNCTION_ARG_REGNO_P (i)
5645 && i != FIRST_RET_REG
5646 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5647 && ! (current_function_calls_eh_return
5648 && (i == EH_RETURN_STACKADJ_REGNO
5649 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5650 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5651 schedule->temps[tmpx++] = i;
5653 entry->mode = VOIDmode;
5654 entry->offset = offset_base;
5656 /* We loop twice: first, we save 8-byte aligned registers in the
5657 higher addresses, that are known to be aligned. Then, we
5658 proceed to saving 32-bit registers that don't need 8-byte
5660 If this is an interrupt function, all registers that need saving
5661 need to be saved in full. moreover, we need to postpone saving
5662 target registers till we have saved some general purpose registers
5663 we can then use as scratch registers. */
5664 offset = offset_base;
5665 for (align = 1; align >= 0; align--)
5667 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5668 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5670 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5673 if (current_function_interrupt)
5675 if (TARGET_REGISTER_P (i))
5677 if (GENERAL_REGISTER_P (i))
5680 if (mode == SFmode && (i % 2) == 1
5681 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5682 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5689 /* If we're doing the aligned pass and this is not aligned,
5690 or we're doing the unaligned pass and this is aligned,
5692 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5696 if (current_function_interrupt
5697 && GENERAL_REGISTER_P (i)
5698 && tmpx < MAX_TEMPS)
5699 schedule->temps[tmpx++] = i;
5701 offset -= GET_MODE_SIZE (mode);
5704 entry->offset = offset;
5707 if (align && current_function_interrupt)
5708 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5709 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5711 offset -= GET_MODE_SIZE (DImode);
5713 entry->mode = DImode;
5714 entry->offset = offset;
5719 entry->mode = VOIDmode;
5720 entry->offset = offset;
5721 schedule->temps[tmpx] = -1;
5726 sh_expand_prologue (void)
5728 HARD_REG_SET live_regs_mask;
5731 int save_flags = target_flags;
5734 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5736 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5738 /* We have pretend args if we had an object sent partially in registers
5739 and partially on the stack, e.g. a large structure. */
5740 pretend_args = current_function_pretend_args_size;
5741 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5742 && (NPARM_REGS(SImode)
5743 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5745 output_stack_adjust (-pretend_args
5746 - current_function_args_info.stack_regs * 8,
5747 stack_pointer_rtx, 0, NULL);
5749 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5750 /* We're going to use the PIC register to load the address of the
5751 incoming-argument decoder and/or of the return trampoline from
5752 the GOT, so make sure the PIC register is preserved and
5754 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5756 if (TARGET_SHCOMPACT
5757 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5761 /* First, make all registers with incoming arguments that will
5762 be pushed onto the stack live, so that register renaming
5763 doesn't overwrite them. */
5764 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5765 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5766 >= NPARM_REGS (SImode) - reg)
5767 for (; reg < NPARM_REGS (SImode); reg++)
5768 emit_insn (gen_shcompact_preserve_incoming_args
5769 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5770 else if (CALL_COOKIE_INT_REG_GET
5771 (current_function_args_info.call_cookie, reg) == 1)
5772 emit_insn (gen_shcompact_preserve_incoming_args
5773 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5775 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5777 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5778 GEN_INT (current_function_args_info.call_cookie));
5779 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5780 gen_rtx_REG (SImode, R0_REG));
5782 else if (TARGET_SHMEDIA)
5784 int tr = sh_media_register_for_return ();
5788 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5789 gen_rtx_REG (DImode, PR_MEDIA_REG));
5791 /* ??? We should suppress saving pr when we don't need it, but this
5792 is tricky because of builtin_return_address. */
5794 /* If this function only exits with sibcalls, this copy
5795 will be flagged as dead. */
5796 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5802 /* Emit the code for SETUP_VARARGS. */
5803 if (current_function_stdarg)
5805 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5807 /* Push arg regs as if they'd been provided by caller in stack. */
5808 for (i = 0; i < NPARM_REGS(SImode); i++)
5810 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5813 if (i >= (NPARM_REGS(SImode)
5814 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5818 RTX_FRAME_RELATED_P (insn) = 0;
5823 /* If we're supposed to switch stacks at function entry, do so now. */
5826 /* The argument specifies a variable holding the address of the
5827 stack the interrupt function should switch to/from at entry/exit. */
5829 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5830 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5832 emit_insn (gen_sp_switch_1 (sp_switch));
5835 d = calc_live_regs (&live_regs_mask);
5836 /* ??? Maybe we could save some switching if we can move a mode switch
5837 that already happens to be at the function start into the prologue. */
5838 if (target_flags != save_flags && ! current_function_interrupt)
5839 emit_insn (gen_toggle_sz ());
5843 int offset_base, offset;
5845 int offset_in_r0 = -1;
5847 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5848 int total_size, save_size;
5849 save_schedule schedule;
5853 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5854 && ! current_function_interrupt)
5855 r0 = gen_rtx_REG (Pmode, R0_REG);
5857 /* D is the actual number of bytes that we need for saving registers,
5858 however, in initial_elimination_offset we have committed to using
5859 an additional TREGS_SPACE amount of bytes - in order to keep both
5860 addresses to arguments supplied by the caller and local variables
5861 valid, we must keep this gap. Place it between the incoming
5862 arguments and the actually saved registers in a bid to optimize
5863 locality of reference. */
5864 total_size = d + tregs_space;
5865 total_size += rounded_frame_size (total_size);
5866 save_size = total_size - rounded_frame_size (d);
5867 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5868 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5869 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5871 /* If adjusting the stack in a single step costs nothing extra, do so.
5872 I.e. either if a single addi is enough, or we need a movi anyway,
5873 and we don't exceed the maximum offset range (the test for the
5874 latter is conservative for simplicity). */
5876 && (CONST_OK_FOR_I10 (-total_size)
5877 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5878 && total_size <= 2044)))
5879 d_rounding = total_size - save_size;
5881 offset_base = d + d_rounding;
5883 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5886 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5887 tmp_pnt = schedule.temps;
5888 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5890 enum machine_mode mode = entry->mode;
5891 unsigned int reg = entry->reg;
5892 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5895 offset = entry->offset;
5897 reg_rtx = gen_rtx_REG (mode, reg);
5899 mem_rtx = gen_frame_mem (mode,
5900 gen_rtx_PLUS (Pmode,
5904 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5911 if (HAVE_PRE_DECREMENT
5912 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5913 || mem_rtx == NULL_RTX
5914 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5916 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
5918 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5927 offset += GET_MODE_SIZE (mode);
5931 if (mem_rtx != NULL_RTX)
5934 if (offset_in_r0 == -1)
5936 emit_move_insn (r0, GEN_INT (offset));
5937 offset_in_r0 = offset;
5939 else if (offset != offset_in_r0)
5944 GEN_INT (offset - offset_in_r0)));
5945 offset_in_r0 += offset - offset_in_r0;
5948 if (pre_dec != NULL_RTX)
5954 (Pmode, r0, stack_pointer_rtx));
5958 offset -= GET_MODE_SIZE (mode);
5959 offset_in_r0 -= GET_MODE_SIZE (mode);
5964 mem_rtx = gen_frame_mem (mode, r0);
5966 mem_rtx = gen_frame_mem (mode,
5967 gen_rtx_PLUS (Pmode,
5971 /* We must not use an r0-based address for target-branch
5972 registers or for special registers without pre-dec
5973 memory addresses, since we store their values in r0
5975 gcc_assert (!TARGET_REGISTER_P (reg)
5976 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5977 || mem_rtx == pre_dec));
5980 orig_reg_rtx = reg_rtx;
5981 if (TARGET_REGISTER_P (reg)
5982 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5983 && mem_rtx != pre_dec))
5985 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5987 emit_move_insn (tmp_reg, reg_rtx);
5989 if (REGNO (tmp_reg) == R0_REG)
5993 gcc_assert (!refers_to_regno_p
5994 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5997 if (*++tmp_pnt <= 0)
5998 tmp_pnt = schedule.temps;
6005 /* Mark as interesting for dwarf cfi generator */
6006 insn = emit_move_insn (mem_rtx, reg_rtx);
6007 RTX_FRAME_RELATED_P (insn) = 1;
6008 /* If we use an intermediate register for the save, we can't
6009 describe this exactly in cfi as a copy of the to-be-saved
6010 register into the temporary register and then the temporary
6011 register on the stack, because the temporary register can
6012 have a different natural size than the to-be-saved register.
6013 Thus, we gloss over the intermediate copy and pretend we do
6014 a direct save from the to-be-saved register. */
6015 if (REGNO (reg_rtx) != reg)
6019 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6020 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6022 REG_NOTES (insn) = note_rtx;
6025 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6027 rtx reg_rtx = gen_rtx_REG (mode, reg);
6029 rtx mem_rtx = gen_frame_mem (mode,
6030 gen_rtx_PLUS (Pmode,
6034 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6035 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6037 REG_NOTES (insn) = note_rtx;
6042 gcc_assert (entry->offset == d_rounding);
6045 push_regs (&live_regs_mask, current_function_interrupt);
6047 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6049 rtx insn = get_last_insn ();
6050 rtx last = emit_insn (gen_GOTaddr2picreg ());
6052 /* Mark these insns as possibly dead. Sometimes, flow2 may
6053 delete all uses of the PIC register. In this case, let it
6054 delete the initialization too. */
6057 insn = NEXT_INSN (insn);
6059 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6063 while (insn != last);
6066 if (SHMEDIA_REGS_STACK_ADJUST ())
6068 /* This must NOT go through the PLT, otherwise mach and macl
6069 may be clobbered. */
6070 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6072 ? "__GCC_push_shmedia_regs"
6073 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6074 emit_insn (gen_shmedia_save_restore_regs_compact
6075 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6078 if (target_flags != save_flags && ! current_function_interrupt)
6080 rtx insn = emit_insn (gen_toggle_sz ());
6082 /* If we're lucky, a mode switch in the function body will
6083 overwrite fpscr, turning this insn dead. Tell flow this
6084 insn is ok to delete. */
6085 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6090 target_flags = save_flags;
6092 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6093 stack_pointer_rtx, 0, NULL);
6095 if (frame_pointer_needed)
6096 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6098 if (TARGET_SHCOMPACT
6099 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6101 /* This must NOT go through the PLT, otherwise mach and macl
6102 may be clobbered. */
6103 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6104 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6105 emit_insn (gen_shcompact_incoming_args ());
6110 sh_expand_epilogue (bool sibcall_p)
6112 HARD_REG_SET live_regs_mask;
6116 int save_flags = target_flags;
6117 int frame_size, save_size;
6118 int fpscr_deferred = 0;
6119 int e = sibcall_p ? -1 : 1;
6121 d = calc_live_regs (&live_regs_mask);
6124 frame_size = rounded_frame_size (d);
6128 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6130 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6131 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6132 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6134 total_size = d + tregs_space;
6135 total_size += rounded_frame_size (total_size);
6136 save_size = total_size - frame_size;
6138 /* If adjusting the stack in a single step costs nothing extra, do so.
6139 I.e. either if a single addi is enough, or we need a movi anyway,
6140 and we don't exceed the maximum offset range (the test for the
6141 latter is conservative for simplicity). */
6143 && ! frame_pointer_needed
6144 && (CONST_OK_FOR_I10 (total_size)
6145 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6146 && total_size <= 2044)))
6147 d_rounding = frame_size;
6149 frame_size -= d_rounding;
6152 if (frame_pointer_needed)
6154 /* We must avoid scheduling the epilogue with previous basic blocks
6155 when exception handling is enabled. See PR/18032. */
6156 if (flag_exceptions)
6157 emit_insn (gen_blockage ());
6158 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6161 /* We must avoid moving the stack pointer adjustment past code
6162 which reads from the local frame, else an interrupt could
6163 occur after the SP adjustment and clobber data in the local
6165 emit_insn (gen_blockage ());
6166 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6168 else if (frame_size)
6170 /* We must avoid moving the stack pointer adjustment past code
6171 which reads from the local frame, else an interrupt could
6172 occur after the SP adjustment and clobber data in the local
6174 emit_insn (gen_blockage ());
6175 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6178 if (SHMEDIA_REGS_STACK_ADJUST ())
6180 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6182 ? "__GCC_pop_shmedia_regs"
6183 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6184 /* This must NOT go through the PLT, otherwise mach and macl
6185 may be clobbered. */
6186 emit_insn (gen_shmedia_save_restore_regs_compact
6187 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6190 /* Pop all the registers. */
6192 if (target_flags != save_flags && ! current_function_interrupt)
6193 emit_insn (gen_toggle_sz ());
6196 int offset_base, offset;
6197 int offset_in_r0 = -1;
6199 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6200 save_schedule schedule;
6204 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6205 offset_base = -entry[1].offset + d_rounding;
6206 tmp_pnt = schedule.temps;
6207 for (; entry->mode != VOIDmode; entry--)
6209 enum machine_mode mode = entry->mode;
6210 int reg = entry->reg;
6211 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6213 offset = offset_base + entry->offset;
6214 reg_rtx = gen_rtx_REG (mode, reg);
6216 mem_rtx = gen_frame_mem (mode,
6217 gen_rtx_PLUS (Pmode,
6221 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6227 if (HAVE_POST_INCREMENT
6228 && (offset == offset_in_r0
6229 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6230 && mem_rtx == NULL_RTX)
6231 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6233 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6235 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6238 post_inc = NULL_RTX;
6247 if (mem_rtx != NULL_RTX)
6250 if (offset_in_r0 == -1)
6252 emit_move_insn (r0, GEN_INT (offset));
6253 offset_in_r0 = offset;
6255 else if (offset != offset_in_r0)
6260 GEN_INT (offset - offset_in_r0)));
6261 offset_in_r0 += offset - offset_in_r0;
6264 if (post_inc != NULL_RTX)
6270 (Pmode, r0, stack_pointer_rtx));
6276 offset_in_r0 += GET_MODE_SIZE (mode);
6279 mem_rtx = gen_frame_mem (mode, r0);
6281 mem_rtx = gen_frame_mem (mode,
6282 gen_rtx_PLUS (Pmode,
6286 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6287 || mem_rtx == post_inc);
6290 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6291 && mem_rtx != post_inc)
6293 insn = emit_move_insn (r0, mem_rtx);
6296 else if (TARGET_REGISTER_P (reg))
6298 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6300 /* Give the scheduler a bit of freedom by using up to
6301 MAX_TEMPS registers in a round-robin fashion. */
6302 insn = emit_move_insn (tmp_reg, mem_rtx);
6305 tmp_pnt = schedule.temps;
6308 insn = emit_move_insn (reg_rtx, mem_rtx);
6309 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6310 /* This is dead, unless we return with a sibcall. */
6311 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6316 gcc_assert (entry->offset + offset_base == d + d_rounding);
6318 else /* ! TARGET_SH5 */
6321 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6323 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6325 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6327 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6328 && hard_regs_intersect_p (&live_regs_mask,
6329 ®_class_contents[DF_REGS]))
6331 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6333 if (j == FIRST_FP_REG && fpscr_deferred)
6338 if (target_flags != save_flags && ! current_function_interrupt)
6339 emit_insn (gen_toggle_sz ());
6340 target_flags = save_flags;
6342 output_stack_adjust (current_function_pretend_args_size
6343 + save_size + d_rounding
6344 + current_function_args_info.stack_regs * 8,
6345 stack_pointer_rtx, e, NULL);
6347 if (current_function_calls_eh_return)
6348 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6349 EH_RETURN_STACKADJ_RTX));
6351 /* Switch back to the normal stack if necessary. */
6352 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6353 emit_insn (gen_sp_switch_2 ());
6355 /* Tell flow the insn that pops PR isn't dead. */
6356 /* PR_REG will never be live in SHmedia mode, and we don't need to
6357 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6358 by the return pattern. */
6359 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6360 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6363 static int sh_need_epilogue_known = 0;
6366 sh_need_epilogue (void)
6368 if (! sh_need_epilogue_known)
6373 sh_expand_epilogue (0);
6374 epilogue = get_insns ();
6376 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6378 return sh_need_epilogue_known > 0;
6381 /* Emit code to change the current function's return address to RA.
6382 TEMP is available as a scratch register, if needed. */
6385 sh_set_return_address (rtx ra, rtx tmp)
6387 HARD_REG_SET live_regs_mask;
6389 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6392 d = calc_live_regs (&live_regs_mask);
6394 /* If pr_reg isn't life, we can set it (or the register given in
6395 sh_media_register_for_return) directly. */
6396 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6402 int rr_regno = sh_media_register_for_return ();
6407 rr = gen_rtx_REG (DImode, rr_regno);
6410 rr = gen_rtx_REG (SImode, pr_reg);
6412 emit_insn (GEN_MOV (rr, ra));
6413 /* Tell flow the register for return isn't dead. */
6414 emit_insn (gen_rtx_USE (VOIDmode, rr));
6421 save_schedule schedule;
6424 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6425 offset = entry[1].offset;
6426 for (; entry->mode != VOIDmode; entry--)
6427 if (entry->reg == pr_reg)
6430 /* We can't find pr register. */
6434 offset = entry->offset - offset;
6435 pr_offset = (rounded_frame_size (d) + offset
6436 + SHMEDIA_REGS_STACK_ADJUST ());
6439 pr_offset = rounded_frame_size (d);
6441 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6442 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6444 tmp = gen_frame_mem (Pmode, tmp);
6445 emit_insn (GEN_MOV (tmp, ra));
6448 /* Clear variables at function end. */
6451 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6452 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6454 sh_need_epilogue_known = 0;
6458 sh_builtin_saveregs (void)
6460 /* First unnamed integer register. */
6461 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6462 /* Number of integer registers we need to save. */
6463 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6464 /* First unnamed SFmode float reg */
6465 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6466 /* Number of SFmode float regs to save. */
6467 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6470 HOST_WIDE_INT alias_set;
6476 int pushregs = n_intregs;
6478 while (pushregs < NPARM_REGS (SImode) - 1
6479 && (CALL_COOKIE_INT_REG_GET
6480 (current_function_args_info.call_cookie,
6481 NPARM_REGS (SImode) - pushregs)
6484 current_function_args_info.call_cookie
6485 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6490 if (pushregs == NPARM_REGS (SImode))
6491 current_function_args_info.call_cookie
6492 |= (CALL_COOKIE_INT_REG (0, 1)
6493 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6495 current_function_args_info.call_cookie
6496 |= CALL_COOKIE_STACKSEQ (pushregs);
6498 current_function_pretend_args_size += 8 * n_intregs;
6500 if (TARGET_SHCOMPACT)
6504 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6506 error ("__builtin_saveregs not supported by this subtarget");
6513 /* Allocate block of memory for the regs. */
6514 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6515 Or can assign_stack_local accept a 0 SIZE argument? */
6516 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6519 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6520 else if (n_floatregs & 1)
6524 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6525 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6526 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6527 regbuf = change_address (regbuf, BLKmode, addr);
6529 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6533 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6534 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6535 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6536 emit_insn (gen_andsi3 (addr, addr, mask));
6537 regbuf = change_address (regbuf, BLKmode, addr);
6540 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6541 alias_set = get_varargs_alias_set ();
6542 set_mem_alias_set (regbuf, alias_set);
6545 This is optimized to only save the regs that are necessary. Explicitly
6546 named args need not be saved. */
6548 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6549 adjust_address (regbuf, BLKmode,
6550 n_floatregs * UNITS_PER_WORD),
6554 /* Return the address of the regbuf. */
6555 return XEXP (regbuf, 0);
6558 This is optimized to only save the regs that are necessary. Explicitly
6559 named args need not be saved.
6560 We explicitly build a pointer to the buffer because it halves the insn
6561 count when not optimizing (otherwise the pointer is built for each reg
6563 We emit the moves in reverse order so that we can use predecrement. */
6565 fpregs = copy_to_mode_reg (Pmode,
6566 plus_constant (XEXP (regbuf, 0),
6567 n_floatregs * UNITS_PER_WORD));
6568 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6571 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6573 emit_insn (gen_addsi3 (fpregs, fpregs,
6574 GEN_INT (-2 * UNITS_PER_WORD)));
6575 mem = change_address (regbuf, DFmode, fpregs);
6576 emit_move_insn (mem,
6577 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6579 regno = first_floatreg;
6582 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6583 mem = change_address (regbuf, SFmode, fpregs);
6584 emit_move_insn (mem,
6585 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6586 - (TARGET_LITTLE_ENDIAN != 0)));
6590 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6594 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6595 mem = change_address (regbuf, SFmode, fpregs);
6596 emit_move_insn (mem,
6597 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6600 /* Return the address of the regbuf. */
6601 return XEXP (regbuf, 0);
6604 /* Define the `__builtin_va_list' type for the ABI. */
6607 sh_build_builtin_va_list (void)
6609 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6612 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6613 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6614 return ptr_type_node;
6616 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6618 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6620 f_next_o_limit = build_decl (FIELD_DECL,
6621 get_identifier ("__va_next_o_limit"),
6623 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6625 f_next_fp_limit = build_decl (FIELD_DECL,
6626 get_identifier ("__va_next_fp_limit"),
6628 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6631 DECL_FIELD_CONTEXT (f_next_o) = record;
6632 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6633 DECL_FIELD_CONTEXT (f_next_fp) = record;
6634 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6635 DECL_FIELD_CONTEXT (f_next_stack) = record;
6637 TYPE_FIELDS (record) = f_next_o;
6638 TREE_CHAIN (f_next_o) = f_next_o_limit;
6639 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6640 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6641 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6643 layout_type (record);
6648 /* Implement `va_start' for varargs and stdarg. */
6651 sh_va_start (tree valist, rtx nextarg)
6653 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6654 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6660 expand_builtin_saveregs ();
6661 std_expand_builtin_va_start (valist, nextarg);
6665 if ((! TARGET_SH2E && ! TARGET_SH4)
6666 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6668 std_expand_builtin_va_start (valist, nextarg);
6672 f_next_o = TYPE_FIELDS (va_list_type_node);
6673 f_next_o_limit = TREE_CHAIN (f_next_o);
6674 f_next_fp = TREE_CHAIN (f_next_o_limit);
6675 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6676 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6678 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6680 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6681 valist, f_next_o_limit, NULL_TREE);
6682 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6684 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6685 valist, f_next_fp_limit, NULL_TREE);
6686 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6687 valist, f_next_stack, NULL_TREE);
6689 /* Call __builtin_saveregs. */
6690 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6691 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
6692 TREE_SIDE_EFFECTS (t) = 1;
6693 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6695 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6700 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6701 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
6702 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6703 TREE_SIDE_EFFECTS (t) = 1;
6704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6706 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
6707 TREE_SIDE_EFFECTS (t) = 1;
6708 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6710 nint = current_function_args_info.arg_count[SH_ARG_INT];
6715 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6716 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
6717 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6718 TREE_SIDE_EFFECTS (t) = 1;
6719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6721 u = make_tree (ptr_type_node, nextarg);
6722 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
6723 TREE_SIDE_EFFECTS (t) = 1;
6724 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6727 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6728 member, return it. */
6730 find_sole_member (tree type)
6732 tree field, member = NULL_TREE;
6734 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6736 if (TREE_CODE (field) != FIELD_DECL)
6738 if (!DECL_SIZE (field))
6740 if (integer_zerop (DECL_SIZE (field)))
6748 /* Implement `va_arg'. */
6751 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6752 tree *post_p ATTRIBUTE_UNUSED)
6754 HOST_WIDE_INT size, rsize;
6755 tree tmp, pptr_type_node;
6756 tree addr, lab_over = NULL, result = NULL;
6757 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6761 type = build_pointer_type (type);
6763 size = int_size_in_bytes (type);
6764 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6765 pptr_type_node = build_pointer_type (ptr_type_node);
6767 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6768 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6770 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6771 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6776 f_next_o = TYPE_FIELDS (va_list_type_node);
6777 f_next_o_limit = TREE_CHAIN (f_next_o);
6778 f_next_fp = TREE_CHAIN (f_next_o_limit);
6779 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6780 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6782 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6784 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6785 valist, f_next_o_limit, NULL_TREE);
6786 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
6787 valist, f_next_fp, NULL_TREE);
6788 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6789 valist, f_next_fp_limit, NULL_TREE);
6790 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6791 valist, f_next_stack, NULL_TREE);
6793 /* Structures with a single member with a distinct mode are passed
6794 like their member. This is relevant if the latter has a REAL_TYPE
6795 or COMPLEX_TYPE type. */
6797 while (TREE_CODE (eff_type) == RECORD_TYPE
6798 && (member = find_sole_member (eff_type))
6799 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6800 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6801 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6803 tree field_type = TREE_TYPE (member);
6805 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
6806 eff_type = field_type;
6809 gcc_assert ((TYPE_ALIGN (eff_type)
6810 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6811 || (TYPE_ALIGN (eff_type)
6812 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6819 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
6820 || (TREE_CODE (eff_type) == COMPLEX_TYPE
6821 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
6826 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
6829 addr = create_tmp_var (pptr_type_node, NULL);
6830 lab_false = create_artificial_label ();
6831 lab_over = create_artificial_label ();
6833 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6837 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6839 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
6841 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6842 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6843 gimplify_and_add (tmp, pre_p);
6845 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6846 gimplify_and_add (tmp, pre_p);
6847 tmp = next_fp_limit;
6848 if (size > 4 && !is_double)
6849 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6850 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6851 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6852 cmp = build3 (COND_EXPR, void_type_node, tmp,
6853 build1 (GOTO_EXPR, void_type_node, lab_false),
6856 gimplify_and_add (cmp, pre_p);
6858 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
6859 || (is_double || size == 16))
6861 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6862 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6863 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6864 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6865 gimplify_and_add (tmp, pre_p);
6868 gimplify_and_add (cmp, pre_p);
6870 #ifdef FUNCTION_ARG_SCmode_WART
6871 if (TYPE_MODE (eff_type) == SCmode
6872 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6874 tree subtype = TREE_TYPE (eff_type);
6878 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6879 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6882 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6883 real = get_initialized_tmp_var (real, pre_p, NULL);
6885 result = build2 (COMPLEX_EXPR, type, real, imag);
6886 result = get_initialized_tmp_var (result, pre_p, NULL);
6888 #endif /* FUNCTION_ARG_SCmode_WART */
6890 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
6891 gimplify_and_add (tmp, pre_p);
6893 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
6894 gimplify_and_add (tmp, pre_p);
6896 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6897 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6898 gimplify_and_add (tmp, pre_p);
6899 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6900 gimplify_and_add (tmp, pre_p);
6902 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6903 gimplify_and_add (tmp, post_p);
6904 valist = next_fp_tmp;
6908 tmp = fold_convert (ptr_type_node, size_int (rsize));
6909 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
6910 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6911 tmp = build3 (COND_EXPR, void_type_node, tmp,
6912 build1 (GOTO_EXPR, void_type_node, lab_false),
6914 gimplify_and_add (tmp, pre_p);
6916 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6917 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6918 gimplify_and_add (tmp, pre_p);
6920 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
6921 gimplify_and_add (tmp, pre_p);
6923 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
6924 gimplify_and_add (tmp, pre_p);
6926 if (size > 4 && ! TARGET_SH4)
6928 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6929 gimplify_and_add (tmp, pre_p);
6932 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6933 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6934 gimplify_and_add (tmp, pre_p);
6939 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
6940 gimplify_and_add (tmp, pre_p);
6944 /* ??? In va-sh.h, there had been code to make values larger than
6945 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6947 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6950 tmp = build2 (MODIFY_EXPR, void_type_node, result, tmp);
6951 gimplify_and_add (tmp, pre_p);
6953 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
6954 gimplify_and_add (tmp, pre_p);
6960 result = build_va_arg_indirect_ref (result);
6966 sh_promote_prototypes (tree type)
6972 return ! sh_attr_renesas_p (type);
6975 /* Whether an argument must be passed by reference. On SHcompact, we
6976 pretend arguments wider than 32-bits that would have been passed in
6977 registers are passed by reference, so that an SHmedia trampoline
6978 loads them into the full 64-bits registers. */
6981 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6982 tree type, bool named)
6984 unsigned HOST_WIDE_INT size;
6987 size = int_size_in_bytes (type);
6989 size = GET_MODE_SIZE (mode);
6991 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6993 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6994 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6995 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6997 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6998 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7005 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7006 tree type, bool named)
7008 if (targetm.calls.must_pass_in_stack (mode, type))
7011 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7012 wants to know about pass-by-reference semantics for incoming
7017 if (TARGET_SHCOMPACT)
7019 cum->byref = shcompact_byref (cum, mode, type, named);
7020 return cum->byref != 0;
7027 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7028 tree type, bool named ATTRIBUTE_UNUSED)
7030 /* ??? How can it possibly be correct to return true only on the
7031 caller side of the equation? Is there someplace else in the
7032 sh backend that's magically producing the copies? */
7033 return (cum->outgoing
7034 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7035 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7039 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7040 tree type, bool named ATTRIBUTE_UNUSED)
7045 && PASS_IN_REG_P (*cum, mode, type)
7046 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7047 && (ROUND_REG (*cum, mode)
7049 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7050 : ROUND_ADVANCE (int_size_in_bytes (type)))
7051 > NPARM_REGS (mode)))
7052 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7054 else if (!TARGET_SHCOMPACT
7055 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7056 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7058 return words * UNITS_PER_WORD;
7062 /* Define where to put the arguments to a function.
7063 Value is zero to push the argument on the stack,
7064 or a hard register in which to store the argument.
7066 MODE is the argument's machine mode.
7067 TYPE is the data type of the argument (as a tree).
7068 This is null for libcalls where that information may
7070 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7071 the preceding args and about the function being called.
7072 NAMED is nonzero if this argument is a named parameter
7073 (otherwise it is an extra parameter matching an ellipsis).
7075 On SH the first args are normally in registers
7076 and the rest are pushed. Any arg that starts within the first
7077 NPARM_REGS words is at least partially passed in a register unless
7078 its data type forbids. */
7082 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7083 tree type, int named)
7085 if (! TARGET_SH5 && mode == VOIDmode)
7086 return GEN_INT (ca->renesas_abi ? 1 : 0);
7089 && PASS_IN_REG_P (*ca, mode, type)
7090 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7094 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7095 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7097 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7098 gen_rtx_REG (SFmode,
7100 + (ROUND_REG (*ca, mode) ^ 1)),
7102 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7103 gen_rtx_REG (SFmode,
7105 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7107 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7110 /* If the alignment of a DF value causes an SF register to be
7111 skipped, we will use that skipped register for the next SF
7113 if ((TARGET_HITACHI || ca->renesas_abi)
7114 && ca->free_single_fp_reg
7116 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7118 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7119 ^ (mode == SFmode && TARGET_SH4
7120 && TARGET_LITTLE_ENDIAN != 0
7121 && ! TARGET_HITACHI && ! ca->renesas_abi);
7122 return gen_rtx_REG (mode, regno);
7128 if (mode == VOIDmode && TARGET_SHCOMPACT)
7129 return GEN_INT (ca->call_cookie);
7131 /* The following test assumes unnamed arguments are promoted to
7133 if (mode == SFmode && ca->free_single_fp_reg)
7134 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7136 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7137 && (named || ! ca->prototype_p)
7138 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7140 if (! ca->prototype_p && TARGET_SHMEDIA)
7141 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7143 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7145 + ca->arg_count[(int) SH_ARG_FLOAT]);
7148 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7149 && (! TARGET_SHCOMPACT
7150 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7151 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7154 return gen_rtx_REG (mode, (FIRST_PARM_REG
7155 + ca->arg_count[(int) SH_ARG_INT]));
7164 /* Update the data in CUM to advance over an argument
7165 of mode MODE and data type TYPE.
7166 (TYPE is null for libcalls where that information may not be
7170 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7171 tree type, int named)
7175 else if (TARGET_SH5)
7177 tree type2 = (ca->byref && type
7180 enum machine_mode mode2 = (ca->byref && type
7183 int dwords = ((ca->byref
7186 ? int_size_in_bytes (type2)
7187 : GET_MODE_SIZE (mode2)) + 7) / 8;
7188 int numregs = MIN (dwords, NPARM_REGS (SImode)
7189 - ca->arg_count[(int) SH_ARG_INT]);
7193 ca->arg_count[(int) SH_ARG_INT] += numregs;
7194 if (TARGET_SHCOMPACT
7195 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7198 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7200 /* N.B. We want this also for outgoing. */
7201 ca->stack_regs += numregs;
7206 ca->stack_regs += numregs;
7207 ca->byref_regs += numregs;
7211 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7215 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7218 else if (dwords > numregs)
7220 int pushregs = numregs;
7222 if (TARGET_SHCOMPACT)
7223 ca->stack_regs += numregs;
7224 while (pushregs < NPARM_REGS (SImode) - 1
7225 && (CALL_COOKIE_INT_REG_GET
7227 NPARM_REGS (SImode) - pushregs)
7231 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7235 if (numregs == NPARM_REGS (SImode))
7237 |= CALL_COOKIE_INT_REG (0, 1)
7238 | CALL_COOKIE_STACKSEQ (numregs - 1);
7241 |= CALL_COOKIE_STACKSEQ (numregs);
7244 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7245 && (named || ! ca->prototype_p))
7247 if (mode2 == SFmode && ca->free_single_fp_reg)
7248 ca->free_single_fp_reg = 0;
7249 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7250 < NPARM_REGS (SFmode))
7253 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7255 - ca->arg_count[(int) SH_ARG_FLOAT]);
7257 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7259 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7261 if (ca->outgoing && numregs > 0)
7265 |= (CALL_COOKIE_INT_REG
7266 (ca->arg_count[(int) SH_ARG_INT]
7267 - numregs + ((numfpregs - 2) / 2),
7268 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7271 while (numfpregs -= 2);
7273 else if (mode2 == SFmode && (named)
7274 && (ca->arg_count[(int) SH_ARG_FLOAT]
7275 < NPARM_REGS (SFmode)))
7276 ca->free_single_fp_reg
7277 = FIRST_FP_PARM_REG - numfpregs
7278 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7284 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7286 /* Note that we've used the skipped register. */
7287 if (mode == SFmode && ca->free_single_fp_reg)
7289 ca->free_single_fp_reg = 0;
7292 /* When we have a DF after an SF, there's an SF register that get
7293 skipped in order to align the DF value. We note this skipped
7294 register, because the next SF value will use it, and not the
7295 SF that follows the DF. */
7297 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7299 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7300 + BASE_ARG_REG (mode));
7304 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7305 || PASS_IN_REG_P (*ca, mode, type))
7306 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7307 = (ROUND_REG (*ca, mode)
7309 ? ROUND_ADVANCE (int_size_in_bytes (type))
7310 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7313 /* The Renesas calling convention doesn't quite fit into this scheme since
7314 the address is passed like an invisible argument, but one that is always
7315 passed in memory. */
7317 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7319 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7321 return gen_rtx_REG (Pmode, 2);
7324 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7327 sh_return_in_memory (tree type, tree fndecl)
7331 if (TYPE_MODE (type) == BLKmode)
7332 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7334 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7338 return (TYPE_MODE (type) == BLKmode
7339 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7340 && TREE_CODE (type) == RECORD_TYPE));
7344 /* We actually emit the code in sh_expand_prologue. We used to use
7345 a static variable to flag that we need to emit this code, but that
7346 doesn't when inlining, when functions are deferred and then emitted
7347 later. Fortunately, we already have two flags that are part of struct
7348 function that tell if a function uses varargs or stdarg. */
7350 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7351 enum machine_mode mode,
7353 int *pretend_arg_size,
7354 int second_time ATTRIBUTE_UNUSED)
7356 gcc_assert (current_function_stdarg);
7357 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7359 int named_parm_regs, anon_parm_regs;
7361 named_parm_regs = (ROUND_REG (*ca, mode)
7363 ? ROUND_ADVANCE (int_size_in_bytes (type))
7364 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7365 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7366 if (anon_parm_regs > 0)
7367 *pretend_arg_size = anon_parm_regs * 4;
7372 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7378 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7380 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7384 /* Define the offset between two registers, one to be eliminated, and
7385 the other its replacement, at the start of a routine. */
7388 initial_elimination_offset (int from, int to)
7391 int regs_saved_rounding = 0;
7392 int total_saved_regs_space;
7393 int total_auto_space;
7394 int save_flags = target_flags;
7396 HARD_REG_SET live_regs_mask;
7398 shmedia_space_reserved_for_target_registers = false;
7399 regs_saved = calc_live_regs (&live_regs_mask);
7400 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7402 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7404 shmedia_space_reserved_for_target_registers = true;
7405 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7408 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7409 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7410 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7412 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7413 copy_flags = target_flags;
7414 target_flags = save_flags;
7416 total_saved_regs_space = regs_saved + regs_saved_rounding;
7418 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7419 return total_saved_regs_space + total_auto_space
7420 + current_function_args_info.byref_regs * 8;
7422 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7423 return total_saved_regs_space + total_auto_space
7424 + current_function_args_info.byref_regs * 8;
7426 /* Initial gap between fp and sp is 0. */
7427 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7430 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7431 return rounded_frame_size (0);
7433 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7434 return rounded_frame_size (0);
7436 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7437 && (to == HARD_FRAME_POINTER_REGNUM
7438 || to == STACK_POINTER_REGNUM));
7441 int n = total_saved_regs_space;
7442 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7443 save_schedule schedule;
7446 n += total_auto_space;
7448 /* If it wasn't saved, there's not much we can do. */
7449 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7452 target_flags = copy_flags;
7454 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7455 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7456 if (entry->reg == pr_reg)
7458 target_flags = save_flags;
7459 return entry->offset;
7464 return total_auto_space;
7467 /* Insert any deferred function attributes from earlier pragmas. */
7469 sh_insert_attributes (tree node, tree *attributes)
7473 if (TREE_CODE (node) != FUNCTION_DECL)
7476 /* We are only interested in fields. */
7480 /* Append the attributes to the deferred attributes. */
7481 *sh_deferred_function_attributes_tail = *attributes;
7482 attrs = sh_deferred_function_attributes;
7486 /* Some attributes imply or require the interrupt attribute. */
7487 if (!lookup_attribute ("interrupt_handler", attrs)
7488 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7490 /* If we have a trapa_handler, but no interrupt_handler attribute,
7491 insert an interrupt_handler attribute. */
7492 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7493 /* We can't use sh_pr_interrupt here because that's not in the
7496 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7497 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7498 interrupt attribute is missing, we ignore the attribute and warn. */
7499 else if (lookup_attribute ("sp_switch", attrs)
7500 || lookup_attribute ("trap_exit", attrs)
7501 || lookup_attribute ("nosave_low_regs", attrs))
7505 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7507 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7508 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7509 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7510 warning (OPT_Wattributes,
7511 "%qs attribute only applies to interrupt functions",
7512 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7515 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7517 tail = &TREE_CHAIN (*tail);
7520 attrs = *attributes;
7524 /* Install the processed list. */
7525 *attributes = attrs;
7527 /* Clear deferred attributes. */
7528 sh_deferred_function_attributes = NULL_TREE;
7529 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7534 /* Supported attributes:
7536 interrupt_handler -- specifies this function is an interrupt handler.
7538 trapa_handler - like above, but don't save all registers.
7540 sp_switch -- specifies an alternate stack for an interrupt handler
7543 trap_exit -- use a trapa to exit an interrupt function instead of
7546 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7547 This is useful on the SH3 and upwards,
7548 which has a separate set of low regs for User and Supervisor modes.
7549 This should only be used for the lowest level of interrupts. Higher levels
7550 of interrupts must save the registers in case they themselves are
7553 renesas -- use Renesas calling/layout conventions (functions and
7558 const struct attribute_spec sh_attribute_table[] =
7560 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7561 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7562 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7563 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7564 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7565 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7566 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7568 /* Symbian support adds three new attributes:
7569 dllexport - for exporting a function/variable that will live in a dll
7570 dllimport - for importing a function/variable from a dll
7572 Microsoft allows multiple declspecs in one __declspec, separating
7573 them with spaces. We do NOT support this. Instead, use __declspec
7575 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7576 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7578 { NULL, 0, 0, false, false, false, NULL }
7581 /* Handle an "interrupt_handler" attribute; arguments as in
7582 struct attribute_spec.handler. */
7584 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7585 tree args ATTRIBUTE_UNUSED,
7586 int flags ATTRIBUTE_UNUSED,
7589 if (TREE_CODE (*node) != FUNCTION_DECL)
7591 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7592 IDENTIFIER_POINTER (name));
7593 *no_add_attrs = true;
7595 else if (TARGET_SHCOMPACT)
7597 error ("attribute interrupt_handler is not compatible with -m5-compact");
7598 *no_add_attrs = true;
7604 /* Handle an "sp_switch" attribute; arguments as in
7605 struct attribute_spec.handler. */
7607 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7608 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7610 if (TREE_CODE (*node) != FUNCTION_DECL)
7612 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7613 IDENTIFIER_POINTER (name));
7614 *no_add_attrs = true;
7616 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7618 /* The argument must be a constant string. */
7619 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7620 IDENTIFIER_POINTER (name));
7621 *no_add_attrs = true;
7627 /* Handle an "trap_exit" attribute; arguments as in
7628 struct attribute_spec.handler. */
7630 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7631 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7633 if (TREE_CODE (*node) != FUNCTION_DECL)
7635 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7636 IDENTIFIER_POINTER (name));
7637 *no_add_attrs = true;
7639 /* The argument specifies a trap number to be used in a trapa instruction
7640 at function exit (instead of an rte instruction). */
7641 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7643 /* The argument must be a constant integer. */
7644 warning (OPT_Wattributes, "%qs attribute argument not an "
7645 "integer constant", IDENTIFIER_POINTER (name));
7646 *no_add_attrs = true;
7653 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7654 tree name ATTRIBUTE_UNUSED,
7655 tree args ATTRIBUTE_UNUSED,
7656 int flags ATTRIBUTE_UNUSED,
7657 bool *no_add_attrs ATTRIBUTE_UNUSED)
7662 /* True if __attribute__((renesas)) or -mrenesas. */
7664 sh_attr_renesas_p (tree td)
7671 td = TREE_TYPE (td);
7672 if (td == error_mark_node)
7674 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7678 /* True if __attribute__((renesas)) or -mrenesas, for the current
7681 sh_cfun_attr_renesas_p (void)
7683 return sh_attr_renesas_p (current_function_decl);
7687 sh_cfun_interrupt_handler_p (void)
7689 return (lookup_attribute ("interrupt_handler",
7690 DECL_ATTRIBUTES (current_function_decl))
7694 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7697 sh_check_pch_target_flags (int old_flags)
7699 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7700 | MASK_SH_E | MASK_HARD_SH4
7701 | MASK_FPU_SINGLE | MASK_SH4))
7702 return _("created and used with different architectures / ABIs");
7703 if ((old_flags ^ target_flags) & MASK_HITACHI)
7704 return _("created and used with different ABIs");
7705 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7706 return _("created and used with different endianness");
7710 /* Predicates used by the templates. */
7712 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7713 Used only in general_movsrc_operand. */
7716 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7728 /* Nonzero if OP is a floating point value with value 0.0. */
7731 fp_zero_operand (rtx op)
7735 if (GET_MODE (op) != SFmode)
7738 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7739 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7742 /* Nonzero if OP is a floating point value with value 1.0. */
7745 fp_one_operand (rtx op)
7749 if (GET_MODE (op) != SFmode)
7752 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7753 return REAL_VALUES_EQUAL (r, dconst1);
7756 /* For -m4 and -m4-single-only, mode switching is used. If we are
7757 compiling without -mfmovd, movsf_ie isn't taken into account for
7758 mode switching. We could check in machine_dependent_reorg for
7759 cases where we know we are in single precision mode, but there is
7760 interface to find that out during reload, so we must avoid
7761 choosing an fldi alternative during reload and thus failing to
7762 allocate a scratch register for the constant loading. */
7766 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7770 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7772 enum rtx_code code = GET_CODE (op);
7773 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7776 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7778 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7780 if (GET_CODE (op) != SYMBOL_REF)
7782 return SYMBOL_REF_TLS_MODEL (op);
7785 /* Return the destination address of a branch. */
7788 branch_dest (rtx branch)
7790 rtx dest = SET_SRC (PATTERN (branch));
7793 if (GET_CODE (dest) == IF_THEN_ELSE)
7794 dest = XEXP (dest, 1);
7795 dest = XEXP (dest, 0);
7796 dest_uid = INSN_UID (dest);
7797 return INSN_ADDRESSES (dest_uid);
7800 /* Return nonzero if REG is not used after INSN.
7801 We assume REG is a reload reg, and therefore does
7802 not live past labels. It may live past calls or jumps though. */
7804 reg_unused_after (rtx reg, rtx insn)
7809 /* If the reg is set by this instruction, then it is safe for our
7810 case. Disregard the case where this is a store to memory, since
7811 we are checking a register used in the store address. */
7812 set = single_set (insn);
7813 if (set && GET_CODE (SET_DEST (set)) != MEM
7814 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7817 while ((insn = NEXT_INSN (insn)))
7823 code = GET_CODE (insn);
7826 /* If this is a label that existed before reload, then the register
7827 if dead here. However, if this is a label added by reorg, then
7828 the register may still be live here. We can't tell the difference,
7829 so we just ignore labels completely. */
7830 if (code == CODE_LABEL)
7835 if (code == JUMP_INSN)
7838 /* If this is a sequence, we must handle them all at once.
7839 We could have for instance a call that sets the target register,
7840 and an insn in a delay slot that uses the register. In this case,
7841 we must return 0. */
7842 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7847 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7849 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7850 rtx set = single_set (this_insn);
7852 if (GET_CODE (this_insn) == CALL_INSN)
7854 else if (GET_CODE (this_insn) == JUMP_INSN)
7856 if (INSN_ANNULLED_BRANCH_P (this_insn))
7861 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7863 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7865 if (GET_CODE (SET_DEST (set)) != MEM)
7871 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7876 else if (code == JUMP_INSN)
7880 set = single_set (insn);
7881 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7883 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7884 return GET_CODE (SET_DEST (set)) != MEM;
7885 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7888 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7896 static GTY(()) rtx fpscr_rtx;
7898 get_fpscr_rtx (void)
7902 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7903 REG_USERVAR_P (fpscr_rtx) = 1;
7904 mark_user_reg (fpscr_rtx);
7906 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7907 mark_user_reg (fpscr_rtx);
7911 static GTY(()) tree fpscr_values;
7914 emit_fpu_switch (rtx scratch, int index)
7918 if (fpscr_values == NULL)
7922 t = build_index_type (integer_one_node);
7923 t = build_array_type (integer_type_node, t);
7924 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
7925 DECL_ARTIFICIAL (t) = 1;
7926 DECL_IGNORED_P (t) = 1;
7927 DECL_EXTERNAL (t) = 1;
7928 TREE_STATIC (t) = 1;
7929 TREE_PUBLIC (t) = 1;
7935 src = DECL_RTL (fpscr_values);
7938 emit_move_insn (scratch, XEXP (src, 0));
7940 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
7941 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
7944 src = adjust_address (src, PSImode, index * 4);
7946 dst = get_fpscr_rtx ();
7947 emit_move_insn (dst, src);
7951 emit_sf_insn (rtx pat)
7957 emit_df_insn (rtx pat)
7963 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7965 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7969 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7971 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7976 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7978 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7982 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7984 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7988 /* ??? gcc does flow analysis strictly after common subexpression
7989 elimination. As a result, common subexpression elimination fails
7990 when there are some intervening statements setting the same register.
7991 If we did nothing about this, this would hurt the precision switching
7992 for SH4 badly. There is some cse after reload, but it is unable to
7993 undo the extra register pressure from the unused instructions, and
7994 it cannot remove auto-increment loads.
7996 A C code example that shows this flow/cse weakness for (at least) SH
7997 and sparc (as of gcc ss-970706) is this:
8011 So we add another pass before common subexpression elimination, to
8012 remove assignments that are dead due to a following assignment in the
8013 same basic block. */
8016 mark_use (rtx x, rtx *reg_set_block)
8022 code = GET_CODE (x);
8027 int regno = REGNO (x);
8028 int nregs = (regno < FIRST_PSEUDO_REGISTER
8029 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8033 reg_set_block[regno + nregs - 1] = 0;
8040 rtx dest = SET_DEST (x);
8042 if (GET_CODE (dest) == SUBREG)
8043 dest = SUBREG_REG (dest);
8044 if (GET_CODE (dest) != REG)
8045 mark_use (dest, reg_set_block);
8046 mark_use (SET_SRC (x), reg_set_block);
8053 const char *fmt = GET_RTX_FORMAT (code);
8055 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8058 mark_use (XEXP (x, i), reg_set_block);
8059 else if (fmt[i] == 'E')
8060 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8061 mark_use (XVECEXP (x, i, j), reg_set_block);
8068 static rtx get_free_reg (HARD_REG_SET);
8070 /* This function returns a register to use to load the address to load
8071 the fpscr from. Currently it always returns r1 or r7, but when we are
8072 able to use pseudo registers after combine, or have a better mechanism
8073 for choosing a register, it should be done here. */
8074 /* REGS_LIVE is the liveness information for the point for which we
8075 need this allocation. In some bare-bones exit blocks, r1 is live at the
8076 start. We can even have all of r0..r3 being live:
8077 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8078 INSN before which new insns are placed with will clobber the register
8079 we return. If a basic block consists only of setting the return value
8080 register to a pseudo and using that register, the return value is not
8081 live before or after this block, yet we we'll insert our insns right in
8085 get_free_reg (HARD_REG_SET regs_live)
8087 if (! TEST_HARD_REG_BIT (regs_live, 1))
8088 return gen_rtx_REG (Pmode, 1);
8090 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8091 there shouldn't be anything but a jump before the function end. */
8092 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8093 return gen_rtx_REG (Pmode, 7);
8096 /* This function will set the fpscr from memory.
8097 MODE is the mode we are setting it to. */
8099 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8101 enum attr_fp_mode fp_mode = mode;
8102 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8103 rtx addr_reg = get_free_reg (regs_live);
8105 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8108 /* Is the given character a logical line separator for the assembler? */
8109 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8110 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8114 sh_insn_length_adjustment (rtx insn)
8116 /* Instructions with unfilled delay slots take up an extra two bytes for
8117 the nop in the delay slot. */
8118 if (((GET_CODE (insn) == INSN
8119 && GET_CODE (PATTERN (insn)) != USE
8120 && GET_CODE (PATTERN (insn)) != CLOBBER)
8121 || GET_CODE (insn) == CALL_INSN
8122 || (GET_CODE (insn) == JUMP_INSN
8123 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8124 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8125 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8126 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8129 /* SH2e has a bug that prevents the use of annulled branches, so if
8130 the delay slot is not filled, we'll have to put a NOP in it. */
8131 if (sh_cpu == CPU_SH2E
8132 && GET_CODE (insn) == JUMP_INSN
8133 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8134 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8135 && get_attr_type (insn) == TYPE_CBRANCH
8136 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8139 /* sh-dsp parallel processing insn take four bytes instead of two. */
8141 if (GET_CODE (insn) == INSN)
8144 rtx body = PATTERN (insn);
8145 const char *template;
8147 int maybe_label = 1;
8149 if (GET_CODE (body) == ASM_INPUT)
8150 template = XSTR (body, 0);
8151 else if (asm_noperands (body) >= 0)
8153 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8162 while (c == ' ' || c == '\t');
8163 /* all sh-dsp parallel-processing insns start with p.
8164 The only non-ppi sh insn starting with p is pref.
8165 The only ppi starting with pr is prnd. */
8166 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8168 /* The repeat pseudo-insn expands two three insns, a total of
8169 six bytes in size. */
8170 else if ((c == 'r' || c == 'R')
8171 && ! strncasecmp ("epeat", template, 5))
8173 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8175 /* If this is a label, it is obviously not a ppi insn. */
8176 if (c == ':' && maybe_label)
8181 else if (c == '\'' || c == '"')
8186 maybe_label = c != ':';
8194 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8195 isn't protected by a PIC unspec. */
8197 nonpic_symbol_mentioned_p (rtx x)
8199 register const char *fmt;
8202 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8203 || GET_CODE (x) == PC)
8206 /* We don't want to look into the possible MEM location of a
8207 CONST_DOUBLE, since we're not going to use it, in general. */
8208 if (GET_CODE (x) == CONST_DOUBLE)
8211 if (GET_CODE (x) == UNSPEC
8212 && (XINT (x, 1) == UNSPEC_PIC
8213 || XINT (x, 1) == UNSPEC_GOT
8214 || XINT (x, 1) == UNSPEC_GOTOFF
8215 || XINT (x, 1) == UNSPEC_GOTPLT
8216 || XINT (x, 1) == UNSPEC_GOTTPOFF
8217 || XINT (x, 1) == UNSPEC_DTPOFF
8218 || XINT (x, 1) == UNSPEC_PLT))
8221 fmt = GET_RTX_FORMAT (GET_CODE (x));
8222 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8228 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8229 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8232 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8239 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8240 @GOTOFF in `reg'. */
8242 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8245 if (tls_symbolic_operand (orig, Pmode))
8248 if (GET_CODE (orig) == LABEL_REF
8249 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8252 reg = gen_reg_rtx (Pmode);
8254 emit_insn (gen_symGOTOFF2reg (reg, orig));
8257 else if (GET_CODE (orig) == SYMBOL_REF)
8260 reg = gen_reg_rtx (Pmode);
8262 emit_insn (gen_symGOT2reg (reg, orig));
8268 /* Mark the use of a constant in the literal table. If the constant
8269 has multiple labels, make it unique. */
8271 mark_constant_pool_use (rtx x)
8273 rtx insn, lab, pattern;
8278 switch (GET_CODE (x))
8288 /* Get the first label in the list of labels for the same constant
8289 and delete another labels in the list. */
8291 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8293 if (GET_CODE (insn) != CODE_LABEL
8294 || LABEL_REFS (insn) != NEXT_INSN (insn))
8299 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8300 INSN_DELETED_P (insn) = 1;
8302 /* Mark constants in a window. */
8303 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8305 if (GET_CODE (insn) != INSN)
8308 pattern = PATTERN (insn);
8309 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8312 switch (XINT (pattern, 1))
8314 case UNSPECV_CONST2:
8315 case UNSPECV_CONST4:
8316 case UNSPECV_CONST8:
8317 XVECEXP (pattern, 0, 1) = const1_rtx;
8319 case UNSPECV_WINDOW_END:
8320 if (XVECEXP (pattern, 0, 0) == x)
8323 case UNSPECV_CONST_END:
8333 /* Return true if it's possible to redirect BRANCH1 to the destination
8334 of an unconditional jump BRANCH2. We only want to do this if the
8335 resulting branch will have a short displacement. */
8337 sh_can_redirect_branch (rtx branch1, rtx branch2)
8339 if (flag_expensive_optimizations && simplejump_p (branch2))
8341 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8345 for (distance = 0, insn = NEXT_INSN (branch1);
8346 insn && distance < 256;
8347 insn = PREV_INSN (insn))
8352 distance += get_attr_length (insn);
8354 for (distance = 0, insn = NEXT_INSN (branch1);
8355 insn && distance < 256;
8356 insn = NEXT_INSN (insn))
8361 distance += get_attr_length (insn);
8367 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8369 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8370 unsigned int new_reg)
8372 /* Interrupt functions can only use registers that have already been
8373 saved by the prologue, even if they would normally be
8376 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8382 /* Function to update the integer COST
8383 based on the relationship between INSN that is dependent on
8384 DEP_INSN through the dependence LINK. The default is to make no
8385 adjustment to COST. This can be used for example to specify to
8386 the scheduler that an output- or anti-dependence does not incur
8387 the same cost as a data-dependence. The return value should be
8388 the new value for COST. */
8390 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8396 /* On SHmedia, if the dependence is an anti-dependence or
8397 output-dependence, there is no cost. */
8398 if (REG_NOTE_KIND (link) != 0)
8400 /* However, dependencies between target register loads and
8401 uses of the register in a subsequent block that are separated
8402 by a conditional branch are not modelled - we have to do with
8403 the anti-dependency between the target register load and the
8404 conditional branch that ends the current block. */
8405 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8406 && GET_CODE (PATTERN (dep_insn)) == SET
8407 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8408 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8409 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8411 int orig_cost = cost;
8412 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8413 rtx target = ((! note
8414 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8415 ? insn : JUMP_LABEL (insn));
8416 /* On the likely path, the branch costs 1, on the unlikely path,
8420 target = next_active_insn (target);
8421 while (target && ! flow_dependent_p (target, dep_insn)
8423 /* If two branches are executed in immediate succession, with the
8424 first branch properly predicted, this causes a stall at the
8425 second branch, hence we won't need the target for the
8426 second branch for two cycles after the launch of the first
8428 if (cost > orig_cost - 2)
8429 cost = orig_cost - 2;
8435 else if (get_attr_is_mac_media (insn)
8436 && get_attr_is_mac_media (dep_insn))
8439 else if (! reload_completed
8440 && GET_CODE (PATTERN (insn)) == SET
8441 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8442 && GET_CODE (PATTERN (dep_insn)) == SET
8443 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8446 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8447 that is needed at the target. */
8448 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8449 && ! flow_dependent_p (insn, dep_insn))
8452 else if (REG_NOTE_KIND (link) == 0)
8454 enum attr_type dep_type, type;
8456 if (recog_memoized (insn) < 0
8457 || recog_memoized (dep_insn) < 0)
8460 dep_type = get_attr_type (dep_insn);
8461 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8463 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8464 && (type = get_attr_type (insn)) != TYPE_CALL
8465 && type != TYPE_SFUNC)
8468 /* The only input for a call that is timing-critical is the
8469 function's address. */
8470 if (GET_CODE(insn) == CALL_INSN)
8472 rtx call = PATTERN (insn);
8474 if (GET_CODE (call) == PARALLEL)
8475 call = XVECEXP (call, 0 ,0);
8476 if (GET_CODE (call) == SET)
8477 call = SET_SRC (call);
8478 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8479 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8480 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8481 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8484 /* Likewise, the most timing critical input for an sfuncs call
8485 is the function address. However, sfuncs typically start
8486 using their arguments pretty quickly.
8487 Assume a four cycle delay before they are needed. */
8488 /* All sfunc calls are parallels with at least four components.
8489 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8490 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8491 && XVECLEN (PATTERN (insn), 0) >= 4
8492 && (reg = sfunc_uses_reg (insn)))
8494 if (! reg_set_p (reg, dep_insn))
8497 /* When the preceding instruction loads the shift amount of
8498 the following SHAD/SHLD, the latency of the load is increased
8501 && get_attr_type (insn) == TYPE_DYN_SHIFT
8502 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8503 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8504 XEXP (SET_SRC (single_set (insn)),
8507 /* When an LS group instruction with a latency of less than
8508 3 cycles is followed by a double-precision floating-point
8509 instruction, FIPR, or FTRV, the latency of the first
8510 instruction is increased to 3 cycles. */
8512 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8513 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8515 /* The lsw register of a double-precision computation is ready one
8517 else if (reload_completed
8518 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8519 && (use_pat = single_set (insn))
8520 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8524 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8525 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8528 /* An anti-dependence penalty of two applies if the first insn is a double
8529 precision fadd / fsub / fmul. */
8530 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8531 && recog_memoized (dep_insn) >= 0
8532 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8533 /* A lot of alleged anti-flow dependences are fake,
8534 so check this one is real. */
8535 && flow_dependent_p (dep_insn, insn))
8542 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8543 if DEP_INSN is anti-flow dependent on INSN. */
8545 flow_dependent_p (rtx insn, rtx dep_insn)
8547 rtx tmp = PATTERN (insn);
8549 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8550 return tmp == NULL_RTX;
8553 /* A helper function for flow_dependent_p called through note_stores. */
8555 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8557 rtx * pinsn = (rtx *) data;
8559 if (*pinsn && reg_referenced_p (x, *pinsn))
8563 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8564 'special function' patterns (type sfunc) that clobber pr, but that
8565 do not look like function calls to leaf_function_p. Hence we must
8566 do this extra check. */
8570 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8573 /* Return where to allocate pseudo for a given hard register initial
8576 sh_allocate_initial_value (rtx hard_reg)
8580 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8582 if (current_function_is_leaf
8583 && ! sh_pr_n_sets ()
8584 && ! (TARGET_SHCOMPACT
8585 && ((current_function_args_info.call_cookie
8586 & ~ CALL_COOKIE_RET_TRAMP (1))
8587 || current_function_has_nonlocal_label)))
8590 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8598 /* This function returns "2" to indicate dual issue for the SH4
8599 processor. To be used by the DFA pipeline description. */
8601 sh_issue_rate (void)
8603 if (TARGET_SUPERSCALAR)
8609 /* Functions for ready queue reordering for sched1. */
8611 /* Get weight for mode for a set x. */
8613 find_set_regmode_weight (rtx x, enum machine_mode mode)
8615 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8617 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8619 if (GET_CODE (SET_DEST (x)) == REG)
8621 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8631 /* Get regmode weight for insn. */
8633 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8635 short reg_weight = 0;
8638 /* Increment weight for each register born here. */
8640 reg_weight += find_set_regmode_weight (x, mode);
8641 if (GET_CODE (x) == PARALLEL)
8644 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8646 x = XVECEXP (PATTERN (insn), 0, j);
8647 reg_weight += find_set_regmode_weight (x, mode);
8650 /* Decrement weight for each register that dies here. */
8651 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8653 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8655 rtx note = XEXP (x, 0);
8656 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8663 /* Calculate regmode weights for all insns of a basic block. */
8665 find_regmode_weight (int b, enum machine_mode mode)
8667 rtx insn, next_tail, head, tail;
8669 get_block_head_tail (b, &head, &tail);
8670 next_tail = NEXT_INSN (tail);
8672 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8674 /* Handle register life information. */
8679 INSN_REGMODE_WEIGHT (insn, mode) =
8680 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8681 else if (mode == SImode)
8682 INSN_REGMODE_WEIGHT (insn, mode) =
8683 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8687 /* Comparison function for ready queue sorting. */
8689 rank_for_reorder (const void *x, const void *y)
8691 rtx tmp = *(const rtx *) y;
8692 rtx tmp2 = *(const rtx *) x;
8694 /* The insn in a schedule group should be issued the first. */
8695 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8696 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8698 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8699 minimizes instruction movement, thus minimizing sched's effect on
8700 register pressure. */
8701 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8704 /* Resort the array A in which only element at index N may be out of order. */
8706 swap_reorder (rtx *a, int n)
8708 rtx insn = a[n - 1];
8711 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8719 #define SCHED_REORDER(READY, N_READY) \
8722 if ((N_READY) == 2) \
8723 swap_reorder (READY, N_READY); \
8724 else if ((N_READY) > 2) \
8725 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8729 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8732 ready_reorder (rtx *ready, int nready)
8734 SCHED_REORDER (ready, nready);
8737 /* Calculate regmode weights for all insns of all basic block. */
8739 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8740 int verbose ATTRIBUTE_UNUSED,
8745 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8746 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8748 FOR_EACH_BB_REVERSE (b)
8750 find_regmode_weight (b->index, SImode);
8751 find_regmode_weight (b->index, SFmode);
8754 CURR_REGMODE_PRESSURE (SImode) = 0;
8755 CURR_REGMODE_PRESSURE (SFmode) = 0;
8761 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8762 int verbose ATTRIBUTE_UNUSED)
8764 if (regmode_weight[0])
8766 free (regmode_weight[0]);
8767 regmode_weight[0] = NULL;
8769 if (regmode_weight[1])
8771 free (regmode_weight[1]);
8772 regmode_weight[1] = NULL;
8776 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8777 keep count of register pressures on SImode and SFmode. */
8779 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8780 int sched_verbose ATTRIBUTE_UNUSED,
8784 if (GET_CODE (PATTERN (insn)) != USE
8785 && GET_CODE (PATTERN (insn)) != CLOBBER)
8786 cached_can_issue_more = can_issue_more - 1;
8788 cached_can_issue_more = can_issue_more;
8790 if (reload_completed)
8791 return cached_can_issue_more;
8793 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8794 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8796 return cached_can_issue_more;
8800 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8801 int verbose ATTRIBUTE_UNUSED,
8802 int veclen ATTRIBUTE_UNUSED)
8804 CURR_REGMODE_PRESSURE (SImode) = 0;
8805 CURR_REGMODE_PRESSURE (SFmode) = 0;
8808 /* Some magic numbers. */
8809 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8810 functions that already have high pressure on r0. */
8811 #define R0_MAX_LIFE_REGIONS 2
8812 #define R0_MAX_LIVE_LENGTH 12
8813 /* Register Pressure thresholds for SImode and SFmode registers. */
8814 #define SIMODE_MAX_WEIGHT 5
8815 #define SFMODE_MAX_WEIGHT 10
8817 /* Return true if the pressure is high for MODE. */
8819 high_pressure (enum machine_mode mode)
8821 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8822 functions that already have high pressure on r0. */
8823 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8824 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8828 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8830 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8833 /* Reorder ready queue if register pressure is high. */
8835 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8836 int sched_verbose ATTRIBUTE_UNUSED,
8839 int clock_var ATTRIBUTE_UNUSED)
8841 if (reload_completed)
8842 return sh_issue_rate ();
8844 if (high_pressure (SFmode) || high_pressure (SImode))
8846 ready_reorder (ready, *n_readyp);
8849 return sh_issue_rate ();
8852 /* Skip cycles if the current register pressure is high. */
8854 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8855 int sched_verbose ATTRIBUTE_UNUSED,
8856 rtx *ready ATTRIBUTE_UNUSED,
8857 int *n_readyp ATTRIBUTE_UNUSED,
8858 int clock_var ATTRIBUTE_UNUSED)
8860 if (reload_completed)
8861 return cached_can_issue_more;
8863 if (high_pressure(SFmode) || high_pressure (SImode))
8866 return cached_can_issue_more;
8869 /* Skip cycles without sorting the ready queue. This will move insn from
8870 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8871 queue by sh_reorder. */
8873 /* Generally, skipping these many cycles are sufficient for all insns to move
8878 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8879 int sched_verbose ATTRIBUTE_UNUSED,
8880 rtx insn ATTRIBUTE_UNUSED,
8885 if (reload_completed)
8890 if ((clock_var - last_clock_var) < MAX_SKIPS)
8895 /* If this is the last cycle we are skipping, allow reordering of R. */
8896 if ((clock_var - last_clock_var) == MAX_SKIPS)
8908 /* SHmedia requires registers for branches, so we can't generate new
8909 branches past reload. */
8911 sh_cannot_modify_jumps_p (void)
8913 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8917 sh_target_reg_class (void)
8919 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8923 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8928 if (! shmedia_space_reserved_for_target_registers)
8930 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8932 if (calc_live_regs (&dummy) >= 6 * 8)
8934 /* This is a borderline case. See if we got a nested loop, or a loop
8935 with a call, or with more than 4 labels inside. */
8936 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8938 if (GET_CODE (insn) == NOTE
8939 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8945 insn = NEXT_INSN (insn);
8946 if ((GET_CODE (insn) == NOTE
8947 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8948 || GET_CODE (insn) == CALL_INSN
8949 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8952 while (GET_CODE (insn) != NOTE
8953 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8960 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8962 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8966 On the SH1..SH4, the trampoline looks like
8967 2 0002 D202 mov.l l2,r2
8968 1 0000 D301 mov.l l1,r3
8971 5 0008 00000000 l1: .long area
8972 6 000c 00000000 l2: .long function
8974 SH5 (compact) uses r1 instead of r3 for the static chain. */
8977 /* Emit RTL insns to initialize the variable parts of a trampoline.
8978 FNADDR is an RTX for the address of the function's pure code.
8979 CXT is an RTX for the static chain value for the function. */
8982 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8984 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
8986 if (TARGET_SHMEDIA64)
8991 rtx movi1 = GEN_INT (0xcc000010);
8992 rtx shori1 = GEN_INT (0xc8000010);
8995 /* The following trampoline works within a +- 128 KB range for cxt:
8996 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8997 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8998 gettr tr1,r1; blink tr0,r63 */
8999 /* Address rounding makes it hard to compute the exact bounds of the
9000 offset for this trampoline, but we have a rather generous offset
9001 range, so frame_offset should do fine as an upper bound. */
9002 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9004 /* ??? could optimize this trampoline initialization
9005 by writing DImode words with two insns each. */
9006 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9007 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9008 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9009 insn = gen_rtx_AND (DImode, insn, mask);
9010 /* Or in ptb/u .,tr1 pattern */
9011 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9012 insn = force_operand (insn, NULL_RTX);
9013 insn = gen_lowpart (SImode, insn);
9014 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9015 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9016 insn = gen_rtx_AND (DImode, insn, mask);
9017 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9018 insn = gen_lowpart (SImode, insn);
9019 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9020 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9021 insn = gen_rtx_AND (DImode, insn, mask);
9022 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9023 insn = gen_lowpart (SImode, insn);
9024 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9025 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9026 insn = gen_rtx_AND (DImode, insn, mask);
9027 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9028 insn = gen_lowpart (SImode, insn);
9029 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9030 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9031 insn = gen_rtx_AND (DImode, insn, mask);
9032 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9033 insn = gen_lowpart (SImode, insn);
9034 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9035 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9036 GEN_INT (0x6bf10600));
9037 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9038 GEN_INT (0x4415fc10));
9039 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9040 GEN_INT (0x4401fff0));
9041 emit_insn (gen_ic_invalidate_line (tramp));
9044 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9045 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9047 tramp_templ = gen_datalabel_ref (tramp_templ);
9049 src = gen_const_mem (BLKmode, tramp_templ);
9050 set_mem_align (dst, 256);
9051 set_mem_align (src, 64);
9052 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9054 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9055 emit_move_insn (adjust_address (tramp_mem, Pmode,
9056 fixed_len + GET_MODE_SIZE (Pmode)),
9058 emit_insn (gen_ic_invalidate_line (tramp));
9061 else if (TARGET_SHMEDIA)
9063 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9064 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9065 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9066 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9067 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9068 rotated 10 right, and higher 16 bit of every 32 selected. */
9070 = force_reg (V2HImode, (simplify_gen_subreg
9071 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9072 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9073 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9075 tramp = force_reg (Pmode, tramp);
9076 fnaddr = force_reg (SImode, fnaddr);
9077 cxt = force_reg (SImode, cxt);
9078 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9079 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9081 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9082 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9083 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9084 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9085 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9086 gen_rtx_SUBREG (V2HImode, cxt, 0),
9088 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9089 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9090 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9091 if (TARGET_LITTLE_ENDIAN)
9093 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9094 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9098 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9099 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9101 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9102 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9103 emit_insn (gen_ic_invalidate_line (tramp));
9106 else if (TARGET_SHCOMPACT)
9108 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9111 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9112 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9114 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9115 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9117 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9118 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9121 if (TARGET_USERMODE)
9122 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9124 0, VOIDmode, 1, tramp, SImode);
9126 emit_insn (gen_ic_invalidate_line (tramp));
9130 /* FIXME: This is overly conservative. A SHcompact function that
9131 receives arguments ``by reference'' will have them stored in its
9132 own stack frame, so it must not pass pointers or references to
9133 these arguments to other functions by means of sibling calls. */
9134 /* If PIC, we cannot make sibling calls to global functions
9135 because the PLT requires r12 to be live. */
9137 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9140 && (! TARGET_SHCOMPACT
9141 || current_function_args_info.stack_regs == 0)
9142 && ! sh_cfun_interrupt_handler_p ()
9144 || (decl && ! TREE_PUBLIC (decl))
9145 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9148 /* Machine specific built-in functions. */
9150 struct builtin_description
9152 const enum insn_code icode;
9153 const char *const name;
9157 /* describe number and signedness of arguments; arg[0] == result
9158 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9159 /* 9: 64 bit pointer, 10: 32 bit pointer */
9160 static const char signature_args[][4] =
9162 #define SH_BLTIN_V2SI2 0
9164 #define SH_BLTIN_V4HI2 1
9166 #define SH_BLTIN_V2SI3 2
9168 #define SH_BLTIN_V4HI3 3
9170 #define SH_BLTIN_V8QI3 4
9172 #define SH_BLTIN_MAC_HISI 5
9174 #define SH_BLTIN_SH_HI 6
9176 #define SH_BLTIN_SH_SI 7
9178 #define SH_BLTIN_V4HI2V2SI 8
9180 #define SH_BLTIN_V4HI2V8QI 9
9182 #define SH_BLTIN_SISF 10
9184 #define SH_BLTIN_LDUA_L 11
9186 #define SH_BLTIN_LDUA_Q 12
9188 #define SH_BLTIN_STUA_L 13
9190 #define SH_BLTIN_STUA_Q 14
9192 #define SH_BLTIN_LDUA_L64 15
9194 #define SH_BLTIN_LDUA_Q64 16
9196 #define SH_BLTIN_STUA_L64 17
9198 #define SH_BLTIN_STUA_Q64 18
9200 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9201 #define SH_BLTIN_2 19
9202 #define SH_BLTIN_SU 19
9204 #define SH_BLTIN_3 20
9205 #define SH_BLTIN_SUS 20
9207 #define SH_BLTIN_PSSV 21
9209 #define SH_BLTIN_XXUU 22
9210 #define SH_BLTIN_UUUU 22
9212 #define SH_BLTIN_PV 23
9215 /* mcmv: operands considered unsigned. */
9216 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9217 /* mperm: control value considered unsigned int. */
9218 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9219 /* mshards_q: returns signed short. */
9220 /* nsb: takes long long arg, returns unsigned char. */
9221 static const struct builtin_description bdesc[] =
9223 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9224 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9225 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9226 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9227 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9228 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9229 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9230 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9231 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9232 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9233 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9234 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9235 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9236 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9237 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9238 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9239 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9240 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9241 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9242 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9243 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9244 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9245 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9246 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9247 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9248 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9249 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9250 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9251 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9252 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9253 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9254 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9255 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9256 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9257 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9258 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9259 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9260 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9261 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9262 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9263 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9264 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9265 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9266 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9267 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9268 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9269 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9270 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9271 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9272 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9273 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9274 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9275 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9276 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9277 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9278 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9279 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9280 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9281 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9282 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9283 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9284 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9285 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9286 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9287 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9288 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9289 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9290 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9291 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9292 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9293 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9294 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9295 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9296 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9297 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9298 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9299 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9300 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9301 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9302 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9303 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9304 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9305 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9306 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9310 sh_media_init_builtins (void)
9312 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9313 const struct builtin_description *d;
9315 memset (shared, 0, sizeof shared);
9316 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9318 tree type, arg_type = 0;
9319 int signature = d->signature;
9322 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9323 type = shared[signature];
9326 int has_result = signature_args[signature][0] != 0;
9328 if ((signature_args[signature][1] & 8)
9329 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9330 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9332 if (! TARGET_FPU_ANY
9333 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9335 type = void_list_node;
9338 int arg = signature_args[signature][i];
9339 int opno = i - 1 + has_result;
9342 arg_type = ptr_type_node;
9344 arg_type = (*lang_hooks.types.type_for_mode)
9345 (insn_data[d->icode].operand[opno].mode,
9350 arg_type = void_type_node;
9353 type = tree_cons (NULL_TREE, arg_type, type);
9355 type = build_function_type (arg_type, type);
9356 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9357 shared[signature] = type;
9359 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9364 /* Implements target hook vector_mode_supported_p. */
9366 sh_vector_mode_supported_p (enum machine_mode mode)
9369 && ((mode == V2SFmode)
9370 || (mode == V4SFmode)
9371 || (mode == V16SFmode)))
9374 else if (TARGET_SHMEDIA
9375 && ((mode == V8QImode)
9376 || (mode == V2HImode)
9377 || (mode == V4HImode)
9378 || (mode == V2SImode)))
9384 /* Implements target hook dwarf_calling_convention. Return an enum
9385 of dwarf_calling_convention. */
9387 sh_dwarf_calling_convention (tree func)
9389 if (sh_attr_renesas_p (func))
9390 return DW_CC_GNU_renesas_sh;
9392 return DW_CC_normal;
9396 sh_init_builtins (void)
9399 sh_media_init_builtins ();
9402 /* Expand an expression EXP that calls a built-in function,
9403 with result going to TARGET if that's convenient
9404 (and in mode MODE if that's convenient).
9405 SUBTARGET may be used as the target for computing one of EXP's operands.
9406 IGNORE is nonzero if the value is to be ignored. */
9409 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9410 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9412 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9413 tree arglist = TREE_OPERAND (exp, 1);
9414 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9415 const struct builtin_description *d = &bdesc[fcode];
9416 enum insn_code icode = d->icode;
9417 int signature = d->signature;
9418 enum machine_mode tmode = VOIDmode;
9423 if (signature_args[signature][0])
9428 tmode = insn_data[icode].operand[0].mode;
9430 || GET_MODE (target) != tmode
9431 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9432 target = gen_reg_rtx (tmode);
9438 for (i = 1; i <= 3; i++, nop++)
9441 enum machine_mode opmode, argmode;
9444 if (! signature_args[signature][i])
9446 arg = TREE_VALUE (arglist);
9447 if (arg == error_mark_node)
9449 arglist = TREE_CHAIN (arglist);
9450 if (signature_args[signature][i] & 8)
9453 optype = ptr_type_node;
9457 opmode = insn_data[icode].operand[nop].mode;
9458 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9460 argmode = TYPE_MODE (TREE_TYPE (arg));
9461 if (argmode != opmode)
9462 arg = build1 (NOP_EXPR, optype, arg);
9463 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9464 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9465 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9471 pat = (*insn_data[d->icode].genfun) (op[0]);
9474 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9477 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9480 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9492 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9494 rtx sel0 = const0_rtx;
9495 rtx sel1 = const1_rtx;
9496 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9497 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9499 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9500 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9504 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9506 rtx sel0 = const0_rtx;
9507 rtx sel1 = const1_rtx;
9508 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9510 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9512 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9513 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9516 /* Return the class of registers for which a mode change from FROM to TO
9519 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9520 enum reg_class class)
9522 /* We want to enable the use of SUBREGs as a means to
9523 VEC_SELECT a single element of a vector. */
9524 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9525 return (reg_classes_intersect_p (GENERAL_REGS, class));
9527 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9529 if (TARGET_LITTLE_ENDIAN)
9531 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9532 return reg_classes_intersect_p (DF_REGS, class);
9536 if (GET_MODE_SIZE (from) < 8)
9537 return reg_classes_intersect_p (DF_HI_REGS, class);
9544 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9545 that label is used. */
9548 sh_mark_label (rtx address, int nuses)
9550 if (GOTOFF_P (address))
9552 /* Extract the label or symbol. */
9553 address = XEXP (address, 0);
9554 if (GET_CODE (address) == PLUS)
9555 address = XEXP (address, 0);
9556 address = XVECEXP (address, 0, 0);
9558 if (GET_CODE (address) == LABEL_REF
9559 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9560 LABEL_NUSES (XEXP (address, 0)) += nuses;
9563 /* Compute extra cost of moving data between one register class
9566 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9567 uses this information. Hence, the general register <-> floating point
9568 register information here is not used for SFmode. */
9571 sh_register_move_cost (enum machine_mode mode,
9572 enum reg_class srcclass, enum reg_class dstclass)
9574 if (dstclass == T_REGS || dstclass == PR_REGS)
9577 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9580 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9581 && REGCLASS_HAS_FP_REG (srcclass)
9582 && REGCLASS_HAS_FP_REG (dstclass))
9585 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9586 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9588 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9589 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9592 if ((REGCLASS_HAS_FP_REG (dstclass)
9593 && REGCLASS_HAS_GENERAL_REG (srcclass))
9594 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9595 && REGCLASS_HAS_FP_REG (srcclass)))
9596 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9597 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9599 if ((dstclass == FPUL_REGS
9600 && REGCLASS_HAS_GENERAL_REG (srcclass))
9601 || (srcclass == FPUL_REGS
9602 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9605 if ((dstclass == FPUL_REGS
9606 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9607 || (srcclass == FPUL_REGS
9608 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9611 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9612 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9615 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9617 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9619 if (sh_gettrcost >= 0)
9620 return sh_gettrcost;
9621 else if (!TARGET_PT_FIXED)
9625 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9626 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9631 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9632 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9633 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9635 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9638 static rtx emit_load_ptr (rtx, rtx);
9641 emit_load_ptr (rtx reg, rtx addr)
9643 rtx mem = gen_const_mem (ptr_mode, addr);
9645 if (Pmode != ptr_mode)
9646 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9647 return emit_move_insn (reg, mem);
9651 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9652 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9655 CUMULATIVE_ARGS cum;
9656 int structure_value_byref = 0;
9657 rtx this, this_value, sibcall, insns, funexp;
9658 tree funtype = TREE_TYPE (function);
9659 int simple_add = CONST_OK_FOR_ADD (delta);
9661 rtx scratch0, scratch1, scratch2;
9664 reload_completed = 1;
9665 epilogue_completed = 1;
9667 current_function_uses_only_leaf_regs = 1;
9668 reset_block_changes ();
9670 emit_note (NOTE_INSN_PROLOGUE_END);
9672 /* Find the "this" pointer. We have such a wide range of ABIs for the
9673 SH that it's best to do this completely machine independently.
9674 "this" is passed as first argument, unless a structure return pointer
9675 comes first, in which case "this" comes second. */
9676 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9677 #ifndef PCC_STATIC_STRUCT_RETURN
9678 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9679 structure_value_byref = 1;
9680 #endif /* not PCC_STATIC_STRUCT_RETURN */
9681 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9683 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9685 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9687 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9689 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9690 static chain pointer (even if you can't have nested virtual functions
9691 right now, someone might implement them sometime), and the rest of the
9692 registers are used for argument passing, are callee-saved, or reserved. */
9693 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9694 -ffixed-reg has been used. */
9695 if (! call_used_regs[0] || fixed_regs[0])
9696 error ("r0 needs to be available as a call-clobbered register");
9697 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9700 if (call_used_regs[1] && ! fixed_regs[1])
9701 scratch1 = gen_rtx_REG (ptr_mode, 1);
9702 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9703 pointing where to return struct values. */
9704 if (call_used_regs[3] && ! fixed_regs[3])
9705 scratch2 = gen_rtx_REG (Pmode, 3);
9707 else if (TARGET_SHMEDIA)
9709 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9710 if (i != REGNO (scratch0) &&
9711 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9713 scratch1 = gen_rtx_REG (ptr_mode, i);
9716 if (scratch1 == scratch0)
9717 error ("Need a second call-clobbered general purpose register");
9718 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9719 if (call_used_regs[i] && ! fixed_regs[i])
9721 scratch2 = gen_rtx_REG (Pmode, i);
9724 if (scratch2 == scratch0)
9725 error ("Need a call-clobbered target register");
9728 this_value = plus_constant (this, delta);
9730 && (simple_add || scratch0 != scratch1)
9731 && strict_memory_address_p (ptr_mode, this_value))
9733 emit_load_ptr (scratch0, this_value);
9739 else if (simple_add)
9740 emit_move_insn (this, this_value);
9743 emit_move_insn (scratch1, GEN_INT (delta));
9744 emit_insn (gen_add2_insn (this, scratch1));
9752 emit_load_ptr (scratch0, this);
9754 offset_addr = plus_constant (scratch0, vcall_offset);
9755 if (strict_memory_address_p (ptr_mode, offset_addr))
9757 else if (! TARGET_SH5 && scratch0 != scratch1)
9759 /* scratch0 != scratch1, and we have indexed loads. Get better
9760 schedule by loading the offset into r1 and using an indexed
9761 load - then the load of r1 can issue before the load from
9762 (this + delta) finishes. */
9763 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9764 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9766 else if (CONST_OK_FOR_ADD (vcall_offset))
9768 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9769 offset_addr = scratch0;
9771 else if (scratch0 != scratch1)
9773 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9774 emit_insn (gen_add2_insn (scratch0, scratch1));
9775 offset_addr = scratch0;
9778 gcc_unreachable (); /* FIXME */
9779 emit_load_ptr (scratch0, offset_addr);
9781 if (Pmode != ptr_mode)
9782 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9783 emit_insn (gen_add2_insn (this, scratch0));
9786 /* Generate a tail call to the target function. */
9787 if (! TREE_USED (function))
9789 assemble_external (function);
9790 TREE_USED (function) = 1;
9792 funexp = XEXP (DECL_RTL (function), 0);
9793 /* If the function is overridden, so is the thunk, hence we don't
9794 need GOT addressing even if this is a public symbol. */
9796 if (TARGET_SH1 && ! flag_weak)
9797 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9800 if (TARGET_SH2 && flag_pic)
9802 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9803 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9807 if (TARGET_SHMEDIA && flag_pic)
9809 funexp = gen_sym2PIC (funexp);
9810 PUT_MODE (funexp, Pmode);
9812 emit_move_insn (scratch2, funexp);
9813 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9814 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9816 sibcall = emit_call_insn (sibcall);
9817 SIBLING_CALL_P (sibcall) = 1;
9818 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9821 /* Run just enough of rest_of_compilation to do scheduling and get
9822 the insns emitted. Note that use_thunk calls
9823 assemble_start_function and assemble_end_function. */
9825 insn_locators_initialize ();
9826 insns = get_insns ();
9830 /* Initialize the bitmap obstacks. */
9831 bitmap_obstack_initialize (NULL);
9832 bitmap_obstack_initialize (®_obstack);
9835 rtl_register_cfg_hooks ();
9836 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9837 init_rtl_bb_info (EXIT_BLOCK_PTR);
9838 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9839 EXIT_BLOCK_PTR->flags |= BB_RTL;
9840 find_basic_blocks (insns);
9842 if (flag_schedule_insns_after_reload)
9844 life_analysis (PROP_FINAL);
9846 split_all_insns (1);
9850 /* We must split jmp insn in PIC case. */
9852 split_all_insns_noflow ();
9857 if (optimize > 0 && flag_delayed_branch)
9858 dbr_schedule (insns);
9860 shorten_branches (insns);
9861 final_start_function (insns, file, 1);
9862 final (insns, file, 1);
9863 final_end_function ();
9867 /* Release all memory allocated by flow. */
9868 free_basic_block_vars ();
9870 /* Release the bitmap obstacks. */
9871 bitmap_obstack_release (®_obstack);
9872 bitmap_obstack_release (NULL);
9875 reload_completed = 0;
9876 epilogue_completed = 0;
9881 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9885 /* If this is not an ordinary function, the name usually comes from a
9886 string literal or an sprintf buffer. Make sure we use the same
9887 string consistently, so that cse will be able to unify address loads. */
9888 if (kind != FUNCTION_ORDINARY)
9889 name = IDENTIFIER_POINTER (get_identifier (name));
9890 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9891 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9895 case FUNCTION_ORDINARY:
9899 rtx reg = target ? target : gen_reg_rtx (Pmode);
9901 emit_insn (gen_symGOT2reg (reg, sym));
9907 /* ??? To allow cse to work, we use GOTOFF relocations.
9908 we could add combiner patterns to transform this into
9909 straight pc-relative calls with sym2PIC / bsrf when
9910 label load and function call are still 1:1 and in the
9911 same basic block during combine. */
9912 rtx reg = target ? target : gen_reg_rtx (Pmode);
9914 emit_insn (gen_symGOTOFF2reg (reg, sym));
9919 if (target && sym != target)
9921 emit_move_insn (target, sym);
9927 /* Find the number of a general purpose register in S. */
9929 scavenge_reg (HARD_REG_SET *s)
9932 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9933 if (TEST_HARD_REG_BIT (*s, r))
9939 sh_get_pr_initial_val (void)
9943 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9944 PR register on SHcompact, because it might be clobbered by the prologue.
9945 We check first if that is known to be the case. */
9946 if (TARGET_SHCOMPACT
9947 && ((current_function_args_info.call_cookie
9948 & ~ CALL_COOKIE_RET_TRAMP (1))
9949 || current_function_has_nonlocal_label))
9950 return gen_frame_mem (SImode, return_address_pointer_rtx);
9952 /* If we haven't finished rtl generation, there might be a nonlocal label
9953 that we haven't seen yet.
9954 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9955 is set, unless it has been called before for the same register. And even
9956 then, we end in trouble if we didn't use the register in the same
9957 basic block before. So call get_hard_reg_initial_val now and wrap it
9958 in an unspec if we might need to replace it. */
9959 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9960 combine can put the pseudo returned by get_hard_reg_initial_val into
9961 instructions that need a general purpose registers, which will fail to
9962 be recognized when the pseudo becomes allocated to PR. */
9964 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9966 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9971 sh_expand_t_scc (enum rtx_code code, rtx target)
9973 rtx result = target;
9976 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9977 || GET_CODE (sh_compare_op1) != CONST_INT)
9979 if (GET_CODE (result) != REG)
9980 result = gen_reg_rtx (SImode);
9981 val = INTVAL (sh_compare_op1);
9982 if ((code == EQ && val == 1) || (code == NE && val == 0))
9983 emit_insn (gen_movt (result));
9984 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9986 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9987 emit_insn (gen_subc (result, result, result));
9988 emit_insn (gen_addsi3 (result, result, const1_rtx));
9990 else if (code == EQ || code == NE)
9991 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9994 if (result != target)
9995 emit_move_insn (target, result);
9999 /* INSN is an sfunc; return the rtx that describes the address used. */
10001 extract_sfunc_addr (rtx insn)
10003 rtx pattern, part = NULL_RTX;
10006 pattern = PATTERN (insn);
10007 len = XVECLEN (pattern, 0);
10008 for (i = 0; i < len; i++)
10010 part = XVECEXP (pattern, 0, i);
10011 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10012 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10013 return XEXP (part, 0);
10015 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10016 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10019 /* Verify that the register in use_sfunc_addr still agrees with the address
10020 used in the sfunc. This prevents fill_slots_from_thread from changing
10022 INSN is the use_sfunc_addr instruction, and REG is the register it
10025 check_use_sfunc_addr (rtx insn, rtx reg)
10027 /* Search for the sfunc. It should really come right after INSN. */
10028 while ((insn = NEXT_INSN (insn)))
10030 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10032 if (! INSN_P (insn))
10035 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10036 insn = XVECEXP (PATTERN (insn), 0, 0);
10037 if (GET_CODE (PATTERN (insn)) != PARALLEL
10038 || get_attr_type (insn) != TYPE_SFUNC)
10040 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10042 gcc_unreachable ();
10045 /* This function returns a constant rtx that represents pi / 2**15 in
10046 SFmode. it's used to scale SFmode angles, in radians, to a
10047 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10048 maps to 0x10000). */
10050 static GTY(()) rtx sh_fsca_sf2int_rtx;
10053 sh_fsca_sf2int (void)
10055 if (! sh_fsca_sf2int_rtx)
10057 REAL_VALUE_TYPE rv;
10059 real_from_string (&rv, "10430.378350470453");
10060 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10063 return sh_fsca_sf2int_rtx;
10066 /* This function returns a constant rtx that represents pi / 2**15 in
10067 DFmode. it's used to scale DFmode angles, in radians, to a
10068 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10069 maps to 0x10000). */
10071 static GTY(()) rtx sh_fsca_df2int_rtx;
10074 sh_fsca_df2int (void)
10076 if (! sh_fsca_df2int_rtx)
10078 REAL_VALUE_TYPE rv;
10080 real_from_string (&rv, "10430.378350470453");
10081 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10084 return sh_fsca_df2int_rtx;
10087 /* This function returns a constant rtx that represents 2**15 / pi in
10088 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10089 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10092 static GTY(()) rtx sh_fsca_int2sf_rtx;
10095 sh_fsca_int2sf (void)
10097 if (! sh_fsca_int2sf_rtx)
10099 REAL_VALUE_TYPE rv;
10101 real_from_string (&rv, "9.587379924285257e-5");
10102 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10105 return sh_fsca_int2sf_rtx;
10108 /* Initialize the CUMULATIVE_ARGS structure. */
10111 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10113 rtx libname ATTRIBUTE_UNUSED,
10115 signed int n_named_args,
10116 enum machine_mode mode)
10118 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10119 pcum->free_single_fp_reg = 0;
10120 pcum->stack_regs = 0;
10121 pcum->byref_regs = 0;
10123 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10125 /* XXX - Should we check TARGET_HITACHI here ??? */
10126 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10130 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10131 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10132 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10133 pcum->arg_count [(int) SH_ARG_INT]
10134 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10137 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10138 && pcum->arg_count [(int) SH_ARG_INT] == 0
10139 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10140 ? int_size_in_bytes (TREE_TYPE (fntype))
10141 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10142 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10143 == FIRST_RET_REG));
10147 pcum->arg_count [(int) SH_ARG_INT] = 0;
10148 pcum->prototype_p = FALSE;
10149 if (mode != VOIDmode)
10151 pcum->call_cookie =
10152 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10153 && GET_MODE_SIZE (mode) > 4
10154 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10156 /* If the default ABI is the Renesas ABI then all library
10157 calls must assume that the library will be using the
10158 Renesas ABI. So if the function would return its result
10159 in memory then we must force the address of this memory
10160 block onto the stack. Ideally we would like to call
10161 targetm.calls.return_in_memory() here but we do not have
10162 the TYPE or the FNDECL available so we synthesize the
10163 contents of that function as best we can. */
10165 (TARGET_DEFAULT & MASK_HITACHI)
10166 && (mode == BLKmode
10167 || (GET_MODE_SIZE (mode) > 4
10168 && !(mode == DFmode
10169 && TARGET_FPU_DOUBLE)));
10173 pcum->call_cookie = 0;
10174 pcum->force_mem = FALSE;
10179 /* Determine if two hard register sets intersect.
10180 Return 1 if they do. */
10183 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10186 COPY_HARD_REG_SET (c, *a);
10187 AND_HARD_REG_SET (c, *b);
10188 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10194 #ifdef TARGET_ADJUST_UNROLL_MAX
10196 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10197 int max_unrolled_insns, int strength_reduce_p,
10200 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10201 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10203 /* Throttle back loop unrolling so that the costs of using more
10204 targets than the eight target register we have don't outweigh
10205 the benefits of unrolling. */
10207 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10208 int n_barriers = 0;
10213 int unroll_benefit = 0, mem_latency = 0;
10214 int base_cost, best_cost, cost;
10215 int factor, best_factor;
10217 unsigned max_iterations = 32767;
10219 int need_precond = 0, precond = 0;
10220 basic_block * bbs = get_loop_body (loop);
10221 struct niter_desc *desc;
10223 /* Assume that all labels inside the loop are used from inside the
10224 loop. If the loop has multiple entry points, it is unlikely to
10225 be unrolled anyways.
10226 Also assume that all calls are to different functions. That is
10227 somewhat pessimistic, but if you have lots of calls, unrolling the
10228 loop is not likely to gain you much in the first place. */
10229 i = loop->num_nodes - 1;
10230 for (insn = BB_HEAD (bbs[i]); ; )
10232 if (GET_CODE (insn) == CODE_LABEL)
10234 else if (GET_CODE (insn) == CALL_INSN)
10236 else if (GET_CODE (insn) == NOTE
10237 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10239 else if (GET_CODE (insn) == BARRIER)
10241 if (insn != BB_END (bbs[i]))
10242 insn = NEXT_INSN (insn);
10244 insn = BB_HEAD (bbs[i]);
10249 /* One label for the loop top is normal, and it won't be duplicated by
10252 return max_unrolled_insns;
10253 if (n_inner_loops > 0)
10255 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10256 dest = LABEL_NEXTREF (dest))
10258 for (i = n_exit_dest - 1;
10259 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10261 exit_dest[n_exit_dest++] = dest;
10263 /* If the loop top and call and exit destinations are enough to fill up
10264 the target registers, we're unlikely to do any more damage by
10266 if (n_calls + n_exit_dest >= 7)
10267 return max_unrolled_insns;
10269 /* ??? In the new loop unroller, there is no longer any strength
10270 reduction information available. Thus, when it comes to unrolling,
10271 we know the cost of everything, but we know the value of nothing. */
10273 if (strength_reduce_p
10274 && (unroll_type == LPT_UNROLL_RUNTIME
10275 || unroll_type == LPT_UNROLL_CONSTANT
10276 || unroll_type == LPT_PEEL_COMPLETELY))
10278 struct loop_ivs *ivs = LOOP_IVS (loop);
10279 struct iv_class *bl;
10281 /* We'll save one compare-and-branch in each loop body copy
10282 but the last one. */
10283 unroll_benefit = 1;
10284 /* Assess the benefit of removing biv & giv updates. */
10285 for (bl = ivs->list; bl; bl = bl->next)
10287 rtx increment = biv_total_increment (bl);
10288 struct induction *v;
10290 if (increment && GET_CODE (increment) == CONST_INT)
10293 for (v = bl->giv; v; v = v->next_iv)
10295 if (! v->ignore && v->same == 0
10296 && GET_CODE (v->mult_val) == CONST_INT)
10298 /* If this giv uses an array, try to determine
10299 a maximum iteration count from the size of the
10300 array. This need not be correct all the time,
10301 but should not be too far off the mark too often. */
10302 while (v->giv_type == DEST_ADDR)
10304 rtx mem = PATTERN (v->insn);
10305 tree mem_expr, type, size_tree;
10307 if (GET_CODE (SET_SRC (mem)) == MEM)
10308 mem = SET_SRC (mem);
10309 else if (GET_CODE (SET_DEST (mem)) == MEM)
10310 mem = SET_DEST (mem);
10313 mem_expr = MEM_EXPR (mem);
10316 type = TREE_TYPE (mem_expr);
10317 if (TREE_CODE (type) != ARRAY_TYPE
10318 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10320 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10323 TYPE_SIZE_UNIT (type));
10324 if (TREE_CODE (size_tree) == INTEGER_CST
10325 && ! TREE_INT_CST_HIGH (size_tree)
10326 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10327 max_iterations = TREE_INT_CST_LOW (size_tree);
10335 /* Assume there is at least some benefit. */
10336 unroll_benefit = 1;
10339 desc = get_simple_loop_desc (loop);
10340 n_iterations = desc->const_iter ? desc->niter : 0;
10342 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10344 if (! strength_reduce_p || ! n_iterations)
10346 if (! n_iterations)
10349 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10350 if (! n_iterations)
10353 #if 0 /* ??? See above - missing induction variable information. */
10354 while (unroll_benefit > 1) /* no loop */
10356 /* We include the benefit of biv/ giv updates. Check if some or
10357 all of these updates are likely to fit into a scheduling
10359 We check for the following case:
10360 - All the insns leading to the first JUMP_INSN are in a strict
10362 - there is at least one memory reference in them.
10364 When we find such a pattern, we assume that we can hide as many
10365 updates as the total of the load latency is, if we have an
10366 unroll factor of at least two. We might or might not also do
10367 this without unrolling, so rather than considering this as an
10368 extra unroll benefit, discount it in the unroll benefits of unroll
10369 factors higher than two. */
10373 insn = next_active_insn (loop->start);
10374 last_set = single_set (insn);
10377 if (GET_CODE (SET_SRC (last_set)) == MEM)
10379 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10381 if (! INSN_P (insn))
10383 if (GET_CODE (insn) == JUMP_INSN)
10385 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10387 /* Check if this is a to-be-reduced giv insn. */
10388 struct loop_ivs *ivs = LOOP_IVS (loop);
10389 struct iv_class *bl;
10390 struct induction *v;
10391 for (bl = ivs->list; bl; bl = bl->next)
10393 if (bl->biv->insn == insn)
10395 for (v = bl->giv; v; v = v->next_iv)
10396 if (v->insn == insn)
10404 set = single_set (insn);
10407 if (GET_CODE (SET_SRC (set)) == MEM)
10411 if (mem_latency < 0)
10413 else if (mem_latency > unroll_benefit - 1)
10414 mem_latency = unroll_benefit - 1;
10418 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10420 return max_unrolled_insns;
10422 n_dest = n_labels + n_calls + n_exit_dest;
10423 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10426 if (n_barriers * 2 > n_labels - 1)
10427 n_barriers = (n_labels - 1) / 2;
10428 for (factor = 2; factor <= 8; factor++)
10430 /* Bump up preconditioning cost for each power of two. */
10431 if (! (factor & (factor-1)))
10433 /* When preconditioning, only powers of two will be considered. */
10434 else if (need_precond)
10436 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10437 + (n_labels - 1) * factor + n_calls + n_exit_dest
10438 - (n_barriers * factor >> 1)
10441 = ((n_dest <= 8 ? 0 : n_dest - 7)
10442 - base_cost * factor
10443 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10444 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10445 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10448 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10449 if (cost < best_cost)
10452 best_factor = factor;
10455 threshold = best_factor * insn_count;
10456 if (max_unrolled_insns > threshold)
10457 max_unrolled_insns = threshold;
10459 return max_unrolled_insns;
10461 #endif /* TARGET_ADJUST_UNROLL_MAX */
10463 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10464 not enter into CONST_DOUBLE for the replace.
10466 Note that copying is not done so X must not be shared unless all copies
10467 are to be modified.
10469 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10470 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10471 replacements[n*2+1] - and that we take mode changes into account.
10473 If a replacement is ambiguous, return NULL_RTX.
10475 If MODIFY is zero, don't modify any rtl in place,
10476 just return zero or nonzero for failure / success. */
10479 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10484 /* The following prevents loops occurrence when we change MEM in
10485 CONST_DOUBLE onto the same CONST_DOUBLE. */
10486 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10489 for (i = n_replacements - 1; i >= 0 ; i--)
10490 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10491 return replacements[i*2+1];
10493 /* Allow this function to make replacements in EXPR_LISTs. */
10497 if (GET_CODE (x) == SUBREG)
10499 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10500 n_replacements, modify);
10502 if (GET_CODE (new) == CONST_INT)
10504 x = simplify_subreg (GET_MODE (x), new,
10505 GET_MODE (SUBREG_REG (x)),
10511 SUBREG_REG (x) = new;
10515 else if (GET_CODE (x) == REG)
10517 unsigned regno = REGNO (x);
10518 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10519 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10520 rtx result = NULL_RTX;
10522 for (i = n_replacements - 1; i >= 0; i--)
10524 rtx from = replacements[i*2];
10525 rtx to = replacements[i*2+1];
10526 unsigned from_regno, from_nregs, to_regno, new_regno;
10528 if (GET_CODE (from) != REG)
10530 from_regno = REGNO (from);
10531 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10532 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10533 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10535 if (regno < from_regno
10536 || regno + nregs > from_regno + nregs
10537 || GET_CODE (to) != REG
10540 to_regno = REGNO (to);
10541 if (to_regno < FIRST_PSEUDO_REGISTER)
10543 new_regno = regno + to_regno - from_regno;
10544 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10547 result = gen_rtx_REG (GET_MODE (x), new_regno);
10549 else if (GET_MODE (x) <= GET_MODE (to))
10550 result = gen_lowpart_common (GET_MODE (x), to);
10552 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10555 return result ? result : x;
10557 else if (GET_CODE (x) == ZERO_EXTEND)
10559 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10560 n_replacements, modify);
10562 if (GET_CODE (new) == CONST_INT)
10564 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10565 new, GET_MODE (XEXP (x, 0)));
10575 fmt = GET_RTX_FORMAT (GET_CODE (x));
10576 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10582 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10583 n_replacements, modify);
10589 else if (fmt[i] == 'E')
10590 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10592 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10593 n_replacements, modify);
10597 XVECEXP (x, i, j) = new;
10605 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10607 enum rtx_code code = TRUNCATE;
10609 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10611 rtx inner = XEXP (x, 0);
10612 enum machine_mode inner_mode = GET_MODE (inner);
10614 if (inner_mode == mode)
10616 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10618 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10619 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10621 code = GET_CODE (x);
10625 return gen_rtx_fmt_e (code, mode, x);
10628 /* called via for_each_rtx after reload, to clean up truncates of
10629 registers that span multiple actual hard registers. */
10631 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10635 if (GET_CODE (x) != TRUNCATE)
10638 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10640 enum machine_mode reg_mode = GET_MODE (reg);
10641 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10642 subreg_lowpart_offset (DImode, reg_mode));
10643 *(int*) n_changes += 1;
10649 /* Load and store depend on the highpart of the address. However,
10650 set_attr_alternative does not give well-defined results before reload,
10651 so we must look at the rtl ourselves to see if any of the feeding
10652 registers is used in a memref. */
10654 /* Called by sh_contains_memref_p via for_each_rtx. */
10656 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10658 return (GET_CODE (*loc) == MEM);
10661 /* Return nonzero iff INSN contains a MEM. */
10663 sh_contains_memref_p (rtx insn)
10665 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10668 /* FNADDR is the MEM expression from a call expander. Return an address
10669 to use in an SHmedia insn pattern. */
10671 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10675 fnaddr = XEXP (fnaddr, 0);
10676 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10677 if (flag_pic && is_sym)
10679 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10681 rtx reg = gen_reg_rtx (Pmode);
10683 /* We must not use GOTPLT for sibcalls, because PIC_REG
10684 must be restored before the PLT code gets to run. */
10686 emit_insn (gen_symGOT2reg (reg, fnaddr));
10688 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10693 fnaddr = gen_sym2PIC (fnaddr);
10694 PUT_MODE (fnaddr, Pmode);
10697 /* If ptabs might trap, make this visible to the rest of the compiler.
10698 We generally assume that symbols pertain to valid locations, but
10699 it is possible to generate invalid symbols with asm or linker tricks.
10700 In a list of functions where each returns its successor, an invalid
10701 symbol might denote an empty list. */
10702 if (!TARGET_PT_FIXED
10703 && (!is_sym || TARGET_INVALID_SYMBOLS)
10704 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10706 rtx tr = gen_reg_rtx (PDImode);
10708 emit_insn (gen_ptabs (tr, fnaddr));
10711 else if (! target_reg_operand (fnaddr, Pmode))
10712 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10717 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10718 enum machine_mode mode, secondary_reload_info *sri)
10722 if (REGCLASS_HAS_FP_REG (class)
10723 && ! TARGET_SHMEDIA
10724 && immediate_operand ((x), mode)
10725 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10726 && mode == SFmode && fldi_ok ()))
10730 sri->icode = CODE_FOR_reload_insf__frn;
10733 sri->icode = CODE_FOR_reload_indf__frn;
10736 /* ??? If we knew that we are in the appropriate mode -
10737 single precision - we could use a reload pattern directly. */
10742 if (class == FPUL_REGS
10743 && ((GET_CODE (x) == REG
10744 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10745 || REGNO (x) == T_REG))
10746 || GET_CODE (x) == PLUS))
10747 return GENERAL_REGS;
10748 if (class == FPUL_REGS && immediate_operand (x, mode))
10750 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
10751 return GENERAL_REGS;
10752 sri->icode = CODE_FOR_reload_insi__i_fpul;
10755 if (class == FPSCR_REGS
10756 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10757 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10758 return GENERAL_REGS;
10759 if (REGCLASS_HAS_FP_REG (class)
10761 && immediate_operand (x, mode)
10762 && x != CONST0_RTX (GET_MODE (x))
10763 && GET_MODE (x) != V4SFmode)
10764 return GENERAL_REGS;
10765 if ((mode == QImode || mode == HImode)
10766 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10768 sri->icode = ((mode == QImode)
10769 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10772 if (TARGET_SHMEDIA && class == GENERAL_REGS
10773 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10774 return TARGET_REGS;
10775 } /* end of input-only processing. */
10777 if (((REGCLASS_HAS_FP_REG (class)
10778 && (GET_CODE (x) == REG
10779 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10780 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10781 && TARGET_FMOVD))))
10782 || (REGCLASS_HAS_GENERAL_REG (class)
10783 && GET_CODE (x) == REG
10784 && FP_REGISTER_P (REGNO (x))))
10785 && ! TARGET_SHMEDIA
10786 && (mode == SFmode || mode == SImode))
10788 if ((class == FPUL_REGS
10789 || (REGCLASS_HAS_FP_REG (class)
10790 && ! TARGET_SHMEDIA && mode == SImode))
10791 && (GET_CODE (x) == MEM
10792 || (GET_CODE (x) == REG
10793 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10794 || REGNO (x) == T_REG
10795 || system_reg_operand (x, VOIDmode)))))
10797 if (class == FPUL_REGS)
10798 return GENERAL_REGS;
10801 if ((class == TARGET_REGS
10802 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10803 && !EXTRA_CONSTRAINT_Csy (x)
10804 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10805 return GENERAL_REGS;
10806 if ((class == MAC_REGS || class == PR_REGS)
10807 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10808 && class != REGNO_REG_CLASS (REGNO (x)))
10809 return GENERAL_REGS;
10810 if (class != GENERAL_REGS && GET_CODE (x) == REG
10811 && TARGET_REGISTER_P (REGNO (x)))
10812 return GENERAL_REGS;
10816 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10818 /* This defines the storage for the variable part of a -mboard= option.
10819 It is only required when using the sh-superh-elf target */
10821 const char * boardtype = "7750p2";
10822 const char * osruntime = "bare";