1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 tree sh_deferred_function_attributes;
74 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
76 /* Global variables for machine-dependent things. */
78 /* Which cpu are we scheduling for. */
79 enum processor_type sh_cpu;
81 /* Definitions used in ready queue reordering for first scheduling pass. */
83 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
84 static short *regmode_weight[2];
86 /* Total SFmode and SImode weights of scheduled insns. */
87 static int curr_regmode_pressure[2];
89 /* If true, skip cycles for Q -> R movement. */
90 static int skip_cycles = 0;
92 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
93 and returned from sh_reorder2. */
94 static short cached_can_issue_more;
96 /* Saved operands from the last compare to use when we generate an scc
102 /* Provides the class number of the smallest class containing
105 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
107 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
108 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
124 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
144 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
145 GENERAL_REGS, GENERAL_REGS,
148 char sh_register_names[FIRST_PSEUDO_REGISTER] \
149 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
151 char sh_additional_register_names[ADDREGNAMES_SIZE] \
152 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
153 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
155 /* Provide reg_class from a letter such as appears in the machine
156 description. *: target independently reserved letter.
157 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
159 enum reg_class reg_class_from_letter[] =
161 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
162 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
163 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
164 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
165 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
166 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
167 /* y */ FPUL_REGS, /* z */ R0_REGS
170 int assembler_dialect;
172 static bool shmedia_space_reserved_for_target_registers;
174 static bool sh_handle_option (size_t, const char *, int);
175 static void split_branches (rtx);
176 static int branch_dest (rtx);
177 static void force_into (rtx, rtx);
178 static void print_slot (rtx);
179 static rtx add_constant (rtx, enum machine_mode, rtx);
180 static void dump_table (rtx, rtx);
181 static int hi_const (rtx);
182 static int broken_move (rtx);
183 static int mova_p (rtx);
184 static rtx find_barrier (int, rtx, rtx);
185 static int noncall_uses_reg (rtx, rtx, rtx *);
186 static rtx gen_block_redirect (rtx, int, int);
187 static void sh_reorg (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
189 static rtx frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET *, int);
193 static int calc_live_regs (HARD_REG_SET *);
194 static void mark_use (rtx, rtx *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static rtx mark_constant_pool_use (rtx);
197 const struct attribute_spec sh_attribute_table[];
198 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
203 static void sh_insert_attributes (tree, tree *);
204 static const char *sh_check_pch_target_flags (int);
205 static int sh_adjust_cost (rtx, rtx, rtx, int);
206 static int sh_issue_rate (void);
207 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
208 static short find_set_regmode_weight (rtx, enum machine_mode);
209 static short find_insn_regmode_weight (rtx, enum machine_mode);
210 static void find_regmode_weight (int, enum machine_mode);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
222 static bool sh_function_ok_for_sibcall (tree, tree);
224 static bool sh_cannot_modify_jumps_p (void);
225 static int sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (tree);
229 static void sh_init_builtins (void);
230 static void sh_media_init_builtins (void);
231 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
232 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
233 static void sh_file_start (void);
234 static int flow_dependent_p (rtx, rtx);
235 static void flow_dependent_p_1 (rtx, rtx, void *);
236 static int shiftcosts (rtx);
237 static int andcosts (rtx);
238 static int addsubcosts (rtx);
239 static int multcosts (rtx);
240 static bool unspec_caller_rtx_p (rtx);
241 static bool sh_cannot_copy_insn_p (rtx);
242 static bool sh_rtx_costs (rtx, int, int, int *);
243 static int sh_address_cost (rtx);
244 #ifdef TARGET_ADJUST_UNROLL_MAX
245 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
247 static int sh_pr_n_sets (void);
248 static rtx sh_allocate_initial_value (rtx);
249 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
252 static int scavenge_reg (HARD_REG_SET *s);
253 struct save_schedule_s;
254 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
255 struct save_schedule_s *, int);
257 static rtx sh_struct_value_rtx (tree, int);
258 static bool sh_return_in_memory (tree, tree);
259 static rtx sh_builtin_saveregs (void);
260 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
261 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
262 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
263 static tree sh_build_builtin_va_list (void);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 static int sh_dwarf_calling_convention (tree);
272 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
275 /* Initialize the GCC target structure. */
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
279 /* The next two are used for debug info when compiling with -gdwarf. */
280 #undef TARGET_ASM_UNALIGNED_HI_OP
281 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
282 #undef TARGET_ASM_UNALIGNED_SI_OP
283 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
285 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
286 #undef TARGET_ASM_UNALIGNED_DI_OP
287 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
288 #undef TARGET_ASM_ALIGNED_DI_OP
289 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
294 #undef TARGET_ASM_OUTPUT_MI_THUNK
295 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
300 #undef TARGET_ASM_FILE_START
301 #define TARGET_ASM_FILE_START sh_file_start
302 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
303 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION sh_handle_option
310 #undef TARGET_INSERT_ATTRIBUTES
311 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
313 #undef TARGET_SCHED_ADJUST_COST
314 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
319 /* The next 5 hooks have been implemented for reenabling sched1. With the
320 help of these macros we are limiting the movement of insns in sched1 to
321 reduce the register pressure. The overall idea is to keep count of SImode
322 and SFmode regs required by already scheduled insns. When these counts
323 cross some threshold values; give priority to insns that free registers.
324 The insn that frees registers is most likely to be the insn with lowest
325 LUID (original insn order); but such an insn might be there in the stalled
326 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
327 upto a max of 8 cycles so that such insns may move from Q -> R.
329 The description of the hooks are as below:
331 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
332 scheduler; it is called inside the sched_init function just after
333 find_insn_reg_weights function call. It is used to calculate the SImode
334 and SFmode weights of insns of basic blocks; much similar to what
335 find_insn_reg_weights does.
336 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
338 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
339 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
342 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
343 high; reorder the ready queue so that the insn with lowest LUID will be
346 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
347 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
349 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
350 can be returned from TARGET_SCHED_REORDER2.
352 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
354 #undef TARGET_SCHED_DFA_NEW_CYCLE
355 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
357 #undef TARGET_SCHED_INIT_GLOBAL
358 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
360 #undef TARGET_SCHED_FINISH_GLOBAL
361 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
366 #undef TARGET_SCHED_REORDER
367 #define TARGET_SCHED_REORDER sh_reorder
369 #undef TARGET_SCHED_REORDER2
370 #define TARGET_SCHED_REORDER2 sh_reorder2
372 #undef TARGET_SCHED_INIT
373 #define TARGET_SCHED_INIT sh_md_init
375 #undef TARGET_CANNOT_MODIFY_JUMPS_P
376 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
377 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
378 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
379 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
380 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
381 sh_optimize_target_register_callee_saved
383 #undef TARGET_MS_BITFIELD_LAYOUT_P
384 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
386 #undef TARGET_INIT_BUILTINS
387 #define TARGET_INIT_BUILTINS sh_init_builtins
388 #undef TARGET_EXPAND_BUILTIN
389 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
394 #undef TARGET_CANNOT_COPY_INSN_P
395 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS sh_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST sh_address_cost
400 #undef TARGET_ALLOCATE_INITIAL_VALUE
401 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
403 #undef TARGET_MACHINE_DEPENDENT_REORG
404 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
407 #undef TARGET_HAVE_TLS
408 #define TARGET_HAVE_TLS true
411 #undef TARGET_PROMOTE_PROTOTYPES
412 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
413 #undef TARGET_PROMOTE_FUNCTION_ARGS
414 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_RETURN
416 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
418 #undef TARGET_STRUCT_VALUE_RTX
419 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
423 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
424 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
427 #undef TARGET_STRICT_ARGUMENT_NAMING
428 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
429 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
430 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
431 #undef TARGET_MUST_PASS_IN_STACK
432 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
433 #undef TARGET_PASS_BY_REFERENCE
434 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
435 #undef TARGET_CALLEE_COPIES
436 #define TARGET_CALLEE_COPIES sh_callee_copies
437 #undef TARGET_ARG_PARTIAL_BYTES
438 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
440 #undef TARGET_BUILD_BUILTIN_VA_LIST
441 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
442 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
443 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
445 #undef TARGET_VECTOR_MODE_SUPPORTED_P
446 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
448 #undef TARGET_CHECK_PCH_TARGET_FLAGS
449 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
451 #undef TARGET_DWARF_CALLING_CONVENTION
452 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
454 /* Return regmode weight for insn. */
455 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
457 /* Return current register pressure for regmode. */
458 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
462 #undef TARGET_ENCODE_SECTION_INFO
463 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
464 #undef TARGET_STRIP_NAME_ENCODING
465 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
466 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
467 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
471 #ifdef TARGET_ADJUST_UNROLL_MAX
472 #undef TARGET_ADJUST_UNROLL_MAX
473 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
476 #undef TARGET_SECONDARY_RELOAD
477 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
479 struct gcc_target targetm = TARGET_INITIALIZER;
481 /* Implement TARGET_HANDLE_OPTION. */
484 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
485 int value ATTRIBUTE_UNUSED)
490 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
494 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
498 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
502 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
506 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
509 case OPT_m2a_single_only:
510 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
514 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
518 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
522 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
526 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
537 case OPT_m4_single_only:
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
542 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
554 case OPT_m4a_single_only:
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
562 case OPT_m5_32media_nofpu:
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
570 case OPT_m5_64media_nofpu:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
578 case OPT_m5_compact_nofpu:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
587 /* Print the operand address in x to the stream. */
590 print_operand_address (FILE *stream, rtx x)
592 switch (GET_CODE (x))
596 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
601 rtx base = XEXP (x, 0);
602 rtx index = XEXP (x, 1);
604 switch (GET_CODE (index))
607 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
608 reg_names[true_regnum (base)]);
614 int base_num = true_regnum (base);
615 int index_num = true_regnum (index);
617 fprintf (stream, "@(r0,%s)",
618 reg_names[MAX (base_num, index_num)]);
629 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
633 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
637 x = mark_constant_pool_use (x);
638 output_addr_const (stream, x);
643 /* Print operand x (an rtx) in assembler syntax to file stream
644 according to modifier code.
646 '.' print a .s if insn needs delay slot
647 ',' print LOCAL_LABEL_PREFIX
648 '@' print trap, rte or rts depending upon pragma interruptness
649 '#' output a nop if there is nothing to put in the delay slot
650 ''' print likelihood suffix (/u for unlikely).
651 '>' print branch target if -fverbose-asm
652 'O' print a constant without the #
653 'R' print the LSW of a dp value - changes if in little endian
654 'S' print the MSW of a dp value - changes if in little endian
655 'T' print the next word of a dp value - same as 'R' in big endian mode.
656 'M' print an `x' if `m' will print `base,index'.
657 'N' print 'r63' if the operand is (const_int 0).
658 'd' print a V2SF reg as dN instead of fpN.
659 'm' print a pair `base,offset' or `base,index', for LD and ST.
660 'U' Likewise for {LD,ST}{HI,LO}.
661 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
662 'o' output an operator. */
665 print_operand (FILE *stream, rtx x, int code)
668 enum machine_mode mode;
676 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
677 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
678 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
681 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
684 trapa_attr = lookup_attribute ("trap_exit",
685 DECL_ATTRIBUTES (current_function_decl));
687 fprintf (stream, "trapa #%ld",
688 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
689 else if (sh_cfun_interrupt_handler_p ())
690 fprintf (stream, "rte");
692 fprintf (stream, "rts");
695 /* Output a nop if there's nothing in the delay slot. */
696 if (dbr_sequence_length () == 0)
697 fprintf (stream, "\n\tnop");
701 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
703 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
704 fputs ("/u", stream);
708 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
710 fputs ("\t! target: ", stream);
711 output_addr_const (stream, JUMP_LABEL (current_output_insn));
715 x = mark_constant_pool_use (x);
716 output_addr_const (stream, x);
718 /* N.B.: %R / %S / %T adjust memory addresses by four.
719 For SHMEDIA, that means they can be used to access the first and
720 second 32 bit part of a 64 bit (or larger) value that
721 might be held in floating point registers or memory.
722 While they can be used to access 64 bit parts of a larger value
723 held in general purpose registers, that won't work with memory -
724 neither for fp registers, since the frxx names are used. */
726 if (REG_P (x) || GET_CODE (x) == SUBREG)
728 regno = true_regnum (x);
729 regno += FP_REGISTER_P (regno) ? 1 : LSW;
730 fputs (reg_names[regno], (stream));
734 x = adjust_address (x, SImode, 4 * LSW);
735 print_operand_address (stream, XEXP (x, 0));
742 if (mode == VOIDmode)
744 if (GET_MODE_SIZE (mode) >= 8)
745 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
747 print_operand (stream, sub, 0);
749 output_operand_lossage ("invalid operand to %%R");
753 if (REG_P (x) || GET_CODE (x) == SUBREG)
755 regno = true_regnum (x);
756 regno += FP_REGISTER_P (regno) ? 0 : MSW;
757 fputs (reg_names[regno], (stream));
761 x = adjust_address (x, SImode, 4 * MSW);
762 print_operand_address (stream, XEXP (x, 0));
769 if (mode == VOIDmode)
771 if (GET_MODE_SIZE (mode) >= 8)
772 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
774 print_operand (stream, sub, 0);
776 output_operand_lossage ("invalid operand to %%S");
780 /* Next word of a double. */
781 switch (GET_CODE (x))
784 fputs (reg_names[REGNO (x) + 1], (stream));
787 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
788 && GET_CODE (XEXP (x, 0)) != POST_INC)
789 x = adjust_address (x, SImode, 4);
790 print_operand_address (stream, XEXP (x, 0));
797 switch (GET_CODE (x))
799 case PLUS: fputs ("add", stream); break;
800 case MINUS: fputs ("sub", stream); break;
801 case MULT: fputs ("mul", stream); break;
802 case DIV: fputs ("div", stream); break;
803 case EQ: fputs ("eq", stream); break;
804 case NE: fputs ("ne", stream); break;
805 case GT: case LT: fputs ("gt", stream); break;
806 case GE: case LE: fputs ("ge", stream); break;
807 case GTU: case LTU: fputs ("gtu", stream); break;
808 case GEU: case LEU: fputs ("geu", stream); break;
814 if (GET_CODE (x) == MEM
815 && GET_CODE (XEXP (x, 0)) == PLUS
816 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
817 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
822 gcc_assert (GET_CODE (x) == MEM);
826 switch (GET_CODE (x))
830 print_operand (stream, x, 0);
831 fputs (", 0", stream);
835 print_operand (stream, XEXP (x, 0), 0);
836 fputs (", ", stream);
837 print_operand (stream, XEXP (x, 1), 0);
846 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
848 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
852 if (x == CONST0_RTX (GET_MODE (x)))
854 fprintf ((stream), "r63");
859 if (GET_CODE (x) == CONST_INT)
861 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
871 switch (GET_CODE (x))
875 rtx inner = XEXP (x, 0);
877 enum machine_mode inner_mode;
879 /* We might see SUBREGs with vector mode registers inside. */
880 if (GET_CODE (inner) == SUBREG
881 && (GET_MODE_SIZE (GET_MODE (inner))
882 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
883 && subreg_lowpart_p (inner))
884 inner = SUBREG_REG (inner);
885 if (GET_CODE (inner) == CONST_INT)
887 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
890 inner_mode = GET_MODE (inner);
891 if (GET_CODE (inner) == SUBREG
892 && (GET_MODE_SIZE (GET_MODE (inner))
893 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
894 && GET_CODE (SUBREG_REG (inner)) == REG)
896 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
897 GET_MODE (SUBREG_REG (inner)),
900 inner = SUBREG_REG (inner);
902 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
904 /* Floating point register pairs are always big endian;
905 general purpose registers are 64 bit wide. */
906 regno = REGNO (inner);
907 regno = (HARD_REGNO_NREGS (regno, inner_mode)
908 - HARD_REGNO_NREGS (regno, mode))
916 /* FIXME: We need this on SHmedia32 because reload generates
917 some sign-extended HI or QI loads into DImode registers
918 but, because Pmode is SImode, the address ends up with a
919 subreg:SI of the DImode register. Maybe reload should be
920 fixed so as to apply alter_subreg to such loads? */
922 gcc_assert (trapping_target_operand (x, VOIDmode));
923 x = XEXP (XEXP (x, 2), 0);
926 gcc_assert (SUBREG_BYTE (x) == 0
927 && GET_CODE (SUBREG_REG (x)) == REG);
935 if (FP_REGISTER_P (regno)
936 && mode == V16SFmode)
937 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
938 else if (FP_REGISTER_P (REGNO (x))
940 fprintf ((stream), "fv%s", reg_names[regno] + 2);
941 else if (GET_CODE (x) == REG
943 fprintf ((stream), "fp%s", reg_names[regno] + 2);
944 else if (FP_REGISTER_P (REGNO (x))
945 && GET_MODE_SIZE (mode) > 4)
946 fprintf ((stream), "d%s", reg_names[regno] + 1);
948 fputs (reg_names[regno], (stream));
952 output_address (XEXP (x, 0));
957 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
958 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
959 && (GET_MODE (XEXP (x, 0)) == DImode
960 || GET_MODE (XEXP (x, 0)) == SImode)
961 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
962 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
964 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
966 bool nested_expr = false;
969 if (GET_CODE (val) == ASHIFTRT)
972 val2 = XEXP (val, 0);
974 if (GET_CODE (val2) == CONST
975 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
980 output_addr_const (stream, val2);
983 if (GET_CODE (val) == ASHIFTRT)
985 fputs (" >> ", stream);
986 output_addr_const (stream, XEXP (val, 1));
989 fputs (" & 65535)", stream);
997 output_addr_const (stream, x);
1004 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1006 force_into (rtx value, rtx target)
1008 value = force_operand (value, target);
1009 if (! rtx_equal_p (value, target))
1010 emit_insn (gen_move_insn (target, value));
1013 /* Emit code to perform a block move. Choose the best method.
1015 OPERANDS[0] is the destination.
1016 OPERANDS[1] is the source.
1017 OPERANDS[2] is the size.
1018 OPERANDS[3] is the alignment safe to use. */
1021 expand_block_move (rtx *operands)
1023 int align = INTVAL (operands[3]);
1024 int constp = (GET_CODE (operands[2]) == CONST_INT);
1025 int bytes = (constp ? INTVAL (operands[2]) : 0);
1030 /* If we could use mov.l to move words and dest is word-aligned, we
1031 can use movua.l for loads and still generate a relatively short
1032 and efficient sequence. */
1033 if (TARGET_SH4A_ARCH && align < 4
1034 && MEM_ALIGN (operands[0]) >= 32
1035 && can_move_by_pieces (bytes, 32))
1037 rtx dest = copy_rtx (operands[0]);
1038 rtx src = copy_rtx (operands[1]);
1039 /* We could use different pseudos for each copied word, but
1040 since movua can only load into r0, it's kind of
1042 rtx temp = gen_reg_rtx (SImode);
1043 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1046 while (copied + 4 <= bytes)
1048 rtx to = adjust_address (dest, SImode, copied);
1049 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1051 emit_insn (gen_movua (temp, from));
1052 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1053 emit_move_insn (to, temp);
1058 move_by_pieces (adjust_address (dest, BLKmode, copied),
1059 adjust_automodify_address (src, BLKmode,
1061 bytes - copied, align, 0);
1066 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1067 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1068 if (align < 4 || (bytes % 4 != 0))
1071 if (TARGET_HARD_SH4)
1075 else if (bytes == 12)
1077 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1078 rtx r4 = gen_rtx_REG (SImode, 4);
1079 rtx r5 = gen_rtx_REG (SImode, 5);
1081 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1082 force_into (XEXP (operands[0], 0), r4);
1083 force_into (XEXP (operands[1], 0), r5);
1084 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1087 else if (! TARGET_SMALLCODE)
1089 const char *entry_name;
1090 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1092 rtx r4 = gen_rtx_REG (SImode, 4);
1093 rtx r5 = gen_rtx_REG (SImode, 5);
1094 rtx r6 = gen_rtx_REG (SImode, 6);
1096 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1097 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1098 force_into (XEXP (operands[0], 0), r4);
1099 force_into (XEXP (operands[1], 0), r5);
1101 dwords = bytes >> 3;
1102 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1103 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1112 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1113 rtx r4 = gen_rtx_REG (SImode, 4);
1114 rtx r5 = gen_rtx_REG (SImode, 5);
1116 sprintf (entry, "__movmemSI%d", bytes);
1117 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1118 force_into (XEXP (operands[0], 0), r4);
1119 force_into (XEXP (operands[1], 0), r5);
1120 emit_insn (gen_block_move_real (func_addr_rtx));
1124 /* This is the same number of bytes as a memcpy call, but to a different
1125 less common function name, so this will occasionally use more space. */
1126 if (! TARGET_SMALLCODE)
1128 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1129 int final_switch, while_loop;
1130 rtx r4 = gen_rtx_REG (SImode, 4);
1131 rtx r5 = gen_rtx_REG (SImode, 5);
1132 rtx r6 = gen_rtx_REG (SImode, 6);
1134 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1135 force_into (XEXP (operands[0], 0), r4);
1136 force_into (XEXP (operands[1], 0), r5);
1138 /* r6 controls the size of the move. 16 is decremented from it
1139 for each 64 bytes moved. Then the negative bit left over is used
1140 as an index into a list of move instructions. e.g., a 72 byte move
1141 would be set up with size(r6) = 14, for one iteration through the
1142 big while loop, and a switch of -2 for the last part. */
1144 final_switch = 16 - ((bytes / 4) % 16);
1145 while_loop = ((bytes / 4) / 16 - 1) * 16;
1146 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1147 emit_insn (gen_block_lump_real (func_addr_rtx));
1154 /* Prepare operands for a move define_expand; specifically, one of the
1155 operands must be in a register. */
1158 prepare_move_operands (rtx operands[], enum machine_mode mode)
1160 if ((mode == SImode || mode == DImode)
1162 && ! ((mode == Pmode || mode == ptr_mode)
1163 && tls_symbolic_operand (operands[1], Pmode) != 0))
1166 if (SYMBOLIC_CONST_P (operands[1]))
1168 if (GET_CODE (operands[0]) == MEM)
1169 operands[1] = force_reg (Pmode, operands[1]);
1170 else if (TARGET_SHMEDIA
1171 && GET_CODE (operands[1]) == LABEL_REF
1172 && target_reg_operand (operands[0], mode))
1176 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1177 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1180 else if (GET_CODE (operands[1]) == CONST
1181 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1182 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1184 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1185 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1187 operands[1] = expand_binop (mode, add_optab, temp,
1188 XEXP (XEXP (operands[1], 0), 1),
1189 no_new_pseudos ? temp
1190 : gen_reg_rtx (Pmode),
1191 0, OPTAB_LIB_WIDEN);
1195 if (! reload_in_progress && ! reload_completed)
1197 /* Copy the source to a register if both operands aren't registers. */
1198 if (! register_operand (operands[0], mode)
1199 && ! sh_register_operand (operands[1], mode))
1200 operands[1] = copy_to_mode_reg (mode, operands[1]);
1202 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1204 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1205 except that we can't use that function because it is static. */
1206 rtx new = change_address (operands[0], mode, 0);
1207 MEM_COPY_ATTRIBUTES (new, operands[0]);
1211 /* This case can happen while generating code to move the result
1212 of a library call to the target. Reject `st r0,@(rX,rY)' because
1213 reload will fail to find a spill register for rX, since r0 is already
1214 being used for the source. */
1216 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1217 && GET_CODE (operands[0]) == MEM
1218 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1219 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1220 operands[1] = copy_to_mode_reg (mode, operands[1]);
1223 if (mode == Pmode || mode == ptr_mode)
1226 enum tls_model tls_kind;
1230 if (GET_CODE (op1) == CONST
1231 && GET_CODE (XEXP (op1, 0)) == PLUS
1232 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1234 opc = XEXP (XEXP (op1, 0), 1);
1235 op1 = XEXP (XEXP (op1, 0), 0);
1240 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1242 rtx tga_op1, tga_ret, tmp, tmp2;
1246 case TLS_MODEL_GLOBAL_DYNAMIC:
1247 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1248 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1252 case TLS_MODEL_LOCAL_DYNAMIC:
1253 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1254 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1256 tmp = gen_reg_rtx (Pmode);
1257 emit_move_insn (tmp, tga_ret);
1259 if (register_operand (op0, Pmode))
1262 tmp2 = gen_reg_rtx (Pmode);
1264 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1268 case TLS_MODEL_INITIAL_EXEC:
1271 /* Don't schedule insns for getting GOT address when
1272 the first scheduling is enabled, to avoid spill
1274 if (flag_schedule_insns)
1275 emit_insn (gen_blockage ());
1276 emit_insn (gen_GOTaddr2picreg ());
1277 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1279 if (flag_schedule_insns)
1280 emit_insn (gen_blockage ());
1282 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1283 tmp = gen_sym2GOTTPOFF (op1);
1284 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1288 case TLS_MODEL_LOCAL_EXEC:
1289 tmp2 = gen_reg_rtx (Pmode);
1290 emit_insn (gen_load_gbr (tmp2));
1291 tmp = gen_reg_rtx (Pmode);
1292 emit_insn (gen_symTPOFF2reg (tmp, op1));
1294 if (register_operand (op0, Pmode))
1297 op1 = gen_reg_rtx (Pmode);
1299 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1306 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1314 /* Prepare the operands for an scc instruction; make sure that the
1315 compare has been done. */
1317 prepare_scc_operands (enum rtx_code code)
1319 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1320 enum rtx_code oldcode = code;
1321 enum machine_mode mode;
1323 /* First need a compare insn. */
1327 /* It isn't possible to handle this case. */
1344 if (code != oldcode)
1346 rtx tmp = sh_compare_op0;
1347 sh_compare_op0 = sh_compare_op1;
1348 sh_compare_op1 = tmp;
1351 mode = GET_MODE (sh_compare_op0);
1352 if (mode == VOIDmode)
1353 mode = GET_MODE (sh_compare_op1);
1355 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1356 if ((code != EQ && code != NE
1357 && (sh_compare_op1 != const0_rtx
1358 || code == GTU || code == GEU || code == LTU || code == LEU))
1359 || (mode == DImode && sh_compare_op1 != const0_rtx)
1360 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1361 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1363 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1364 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1365 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1366 gen_rtx_SET (VOIDmode, t_reg,
1367 gen_rtx_fmt_ee (code, SImode,
1368 sh_compare_op0, sh_compare_op1)),
1369 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1371 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1372 gen_rtx_fmt_ee (code, SImode,
1373 sh_compare_op0, sh_compare_op1)));
1378 /* Called from the md file, set up the operands of a compare instruction. */
1381 from_compare (rtx *operands, int code)
1383 enum machine_mode mode = GET_MODE (sh_compare_op0);
1385 if (mode == VOIDmode)
1386 mode = GET_MODE (sh_compare_op1);
1389 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1391 /* Force args into regs, since we can't use constants here. */
1392 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1393 if (sh_compare_op1 != const0_rtx
1394 || code == GTU || code == GEU
1395 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1396 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1398 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1400 from_compare (operands, GT);
1401 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1404 insn = gen_rtx_SET (VOIDmode,
1405 gen_rtx_REG (SImode, T_REG),
1406 gen_rtx_fmt_ee (code, SImode,
1407 sh_compare_op0, sh_compare_op1));
1408 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1410 insn = gen_rtx_PARALLEL (VOIDmode,
1412 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1413 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1419 /* Functions to output assembly code. */
1421 /* Return a sequence of instructions to perform DI or DF move.
1423 Since the SH cannot move a DI or DF in one instruction, we have
1424 to take care when we see overlapping source and dest registers. */
1427 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1428 enum machine_mode mode)
1430 rtx dst = operands[0];
1431 rtx src = operands[1];
1433 if (GET_CODE (dst) == MEM
1434 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1435 return "mov.l %T1,%0\n\tmov.l %1,%0";
1437 if (register_operand (dst, mode)
1438 && register_operand (src, mode))
1440 if (REGNO (src) == MACH_REG)
1441 return "sts mach,%S0\n\tsts macl,%R0";
1443 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1444 when mov.d r1,r0 do r1->r0 then r2->r1. */
1446 if (REGNO (src) + 1 == REGNO (dst))
1447 return "mov %T1,%T0\n\tmov %1,%0";
1449 return "mov %1,%0\n\tmov %T1,%T0";
1451 else if (GET_CODE (src) == CONST_INT)
1453 if (INTVAL (src) < 0)
1454 output_asm_insn ("mov #-1,%S0", operands);
1456 output_asm_insn ("mov #0,%S0", operands);
1458 return "mov %1,%R0";
1460 else if (GET_CODE (src) == MEM)
1463 int dreg = REGNO (dst);
1464 rtx inside = XEXP (src, 0);
1466 switch (GET_CODE (inside))
1469 ptrreg = REGNO (inside);
1473 ptrreg = subreg_regno (inside);
1477 ptrreg = REGNO (XEXP (inside, 0));
1478 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1479 an offsettable address. Unfortunately, offsettable addresses use
1480 QImode to check the offset, and a QImode offsettable address
1481 requires r0 for the other operand, which is not currently
1482 supported, so we can't use the 'o' constraint.
1483 Thus we must check for and handle r0+REG addresses here.
1484 We punt for now, since this is likely very rare. */
1485 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1489 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1491 return "mov.l %1,%0\n\tmov.l %1,%T0";
1496 /* Work out the safe way to copy. Copy into the second half first. */
1498 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1501 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1504 /* Print an instruction which would have gone into a delay slot after
1505 another instruction, but couldn't because the other instruction expanded
1506 into a sequence where putting the slot insn at the end wouldn't work. */
1509 print_slot (rtx insn)
1511 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1513 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1517 output_far_jump (rtx insn, rtx op)
1519 struct { rtx lab, reg, op; } this;
1520 rtx braf_base_lab = NULL_RTX;
1523 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1526 this.lab = gen_label_rtx ();
1530 && offset - get_attr_length (insn) <= 32766)
1533 jump = "mov.w %O0,%1; braf %1";
1541 jump = "mov.l %O0,%1; braf %1";
1543 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1546 jump = "mov.l %O0,%1; jmp @%1";
1548 /* If we have a scratch register available, use it. */
1549 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1550 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1552 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1553 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1554 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1555 output_asm_insn (jump, &this.lab);
1556 if (dbr_sequence_length ())
1557 print_slot (final_sequence);
1559 output_asm_insn ("nop", 0);
1563 /* Output the delay slot insn first if any. */
1564 if (dbr_sequence_length ())
1565 print_slot (final_sequence);
1567 this.reg = gen_rtx_REG (SImode, 13);
1568 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1569 Fortunately, MACL is fixed and call-clobbered, and we never
1570 need its value across jumps, so save r13 in it instead of in
1573 output_asm_insn ("lds r13, macl", 0);
1575 output_asm_insn ("mov.l r13,@-r15", 0);
1576 output_asm_insn (jump, &this.lab);
1578 output_asm_insn ("sts macl, r13", 0);
1580 output_asm_insn ("mov.l @r15+,r13", 0);
1582 if (far && flag_pic && TARGET_SH2)
1584 braf_base_lab = gen_label_rtx ();
1585 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1586 CODE_LABEL_NUMBER (braf_base_lab));
1589 output_asm_insn (".align 2", 0);
1590 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1592 if (far && flag_pic)
1595 this.lab = braf_base_lab;
1596 output_asm_insn (".long %O2-%O0", &this.lab);
1599 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1603 /* Local label counter, used for constants in the pool and inside
1604 pattern branches. */
1606 static int lf = 100;
1608 /* Output code for ordinary branches. */
1611 output_branch (int logic, rtx insn, rtx *operands)
1613 switch (get_attr_length (insn))
1616 /* This can happen if filling the delay slot has caused a forward
1617 branch to exceed its range (we could reverse it, but only
1618 when we know we won't overextend other branches; this should
1619 best be handled by relaxation).
1620 It can also happen when other condbranches hoist delay slot insn
1621 from their destination, thus leading to code size increase.
1622 But the branch will still be in the range -4092..+4098 bytes. */
1627 /* The call to print_slot will clobber the operands. */
1628 rtx op0 = operands[0];
1630 /* If the instruction in the delay slot is annulled (true), then
1631 there is no delay slot where we can put it now. The only safe
1632 place for it is after the label. final will do that by default. */
1635 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1636 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1638 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1639 ASSEMBLER_DIALECT ? "/" : ".", label);
1640 print_slot (final_sequence);
1643 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1645 output_asm_insn ("bra\t%l0", &op0);
1646 fprintf (asm_out_file, "\tnop\n");
1647 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1651 /* When relaxing, handle this like a short branch. The linker
1652 will fix it up if it still doesn't fit after relaxation. */
1654 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1656 /* These are for SH2e, in which we have to account for the
1657 extra nop because of the hardware bug in annulled branches. */
1663 gcc_assert (!final_sequence
1664 || !(INSN_ANNULLED_BRANCH_P
1665 (XVECEXP (final_sequence, 0, 0))));
1666 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1668 ASSEMBLER_DIALECT ? "/" : ".", label);
1669 fprintf (asm_out_file, "\tnop\n");
1670 output_asm_insn ("bra\t%l0", operands);
1671 fprintf (asm_out_file, "\tnop\n");
1672 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1676 /* When relaxing, fall through. */
1681 sprintf (buffer, "b%s%ss\t%%l0",
1683 ASSEMBLER_DIALECT ? "/" : ".");
1684 output_asm_insn (buffer, &operands[0]);
1689 /* There should be no longer branches now - that would
1690 indicate that something has destroyed the branches set
1691 up in machine_dependent_reorg. */
1697 output_branchy_insn (enum rtx_code code, const char *template,
1698 rtx insn, rtx *operands)
1700 rtx next_insn = NEXT_INSN (insn);
1702 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1704 rtx src = SET_SRC (PATTERN (next_insn));
1705 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1707 /* Following branch not taken */
1708 operands[9] = gen_label_rtx ();
1709 emit_label_after (operands[9], next_insn);
1710 INSN_ADDRESSES_NEW (operands[9],
1711 INSN_ADDRESSES (INSN_UID (next_insn))
1712 + get_attr_length (next_insn));
1717 int offset = (branch_dest (next_insn)
1718 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1719 if (offset >= -252 && offset <= 258)
1721 if (GET_CODE (src) == IF_THEN_ELSE)
1723 src = XEXP (src, 1);
1729 operands[9] = gen_label_rtx ();
1730 emit_label_after (operands[9], insn);
1731 INSN_ADDRESSES_NEW (operands[9],
1732 INSN_ADDRESSES (INSN_UID (insn))
1733 + get_attr_length (insn));
1738 output_ieee_ccmpeq (rtx insn, rtx *operands)
1740 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1744 /* Output the start of the assembler file. */
1747 sh_file_start (void)
1749 default_file_start ();
1752 /* Declare the .directive section before it is used. */
1753 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1754 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1758 /* We need to show the text section with the proper
1759 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1760 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1761 will complain. We can teach GAS specifically about the
1762 default attributes for our choice of text section, but
1763 then we would have to change GAS again if/when we change
1764 the text section name. */
1765 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1767 /* Switch to the data section so that the coffsem symbol
1768 isn't in the text section. */
1769 switch_to_section (data_section);
1771 if (TARGET_LITTLE_ENDIAN)
1772 fputs ("\t.little\n", asm_out_file);
1776 if (TARGET_SHCOMPACT)
1777 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1778 else if (TARGET_SHMEDIA)
1779 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1780 TARGET_SHMEDIA64 ? 64 : 32);
1784 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1787 unspec_caller_rtx_p (rtx pat)
1789 switch (GET_CODE (pat))
1792 return unspec_caller_rtx_p (XEXP (pat, 0));
1795 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1797 return unspec_caller_rtx_p (XEXP (pat, 1));
1799 if (XINT (pat, 1) == UNSPEC_CALLER)
1808 /* Indicate that INSN cannot be duplicated. This is true for insn
1809 that generates a unique label. */
1812 sh_cannot_copy_insn_p (rtx insn)
1816 if (!reload_completed || !flag_pic)
1819 if (GET_CODE (insn) != INSN)
1821 if (asm_noperands (insn) >= 0)
1824 pat = PATTERN (insn);
1825 if (GET_CODE (pat) != SET)
1827 pat = SET_SRC (pat);
1829 if (unspec_caller_rtx_p (pat))
1835 /* Actual number of instructions used to make a shift by N. */
1836 static const char ashiftrt_insns[] =
1837 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1839 /* Left shift and logical right shift are the same. */
1840 static const char shift_insns[] =
1841 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1843 /* Individual shift amounts needed to get the above length sequences.
1844 One bit right shifts clobber the T bit, so when possible, put one bit
1845 shifts in the middle of the sequence, so the ends are eligible for
1846 branch delay slots. */
1847 static const short shift_amounts[32][5] = {
1848 {0}, {1}, {2}, {2, 1},
1849 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1850 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1851 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1852 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1853 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1854 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1855 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1857 /* Likewise, but for shift amounts < 16, up to three highmost bits
1858 might be clobbered. This is typically used when combined with some
1859 kind of sign or zero extension. */
1861 static const char ext_shift_insns[] =
1862 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1864 static const short ext_shift_amounts[32][4] = {
1865 {0}, {1}, {2}, {2, 1},
1866 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1867 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1868 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1869 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1870 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1871 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1872 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1874 /* Assuming we have a value that has been sign-extended by at least one bit,
1875 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1876 to shift it by N without data loss, and quicker than by other means? */
1877 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1879 /* This is used in length attributes in sh.md to help compute the length
1880 of arbitrary constant shift instructions. */
1883 shift_insns_rtx (rtx insn)
1885 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1886 int shift_count = INTVAL (XEXP (set_src, 1));
1887 enum rtx_code shift_code = GET_CODE (set_src);
1892 return ashiftrt_insns[shift_count];
1895 return shift_insns[shift_count];
1901 /* Return the cost of a shift. */
1911 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1913 if (GET_MODE (x) == DImode
1914 && GET_CODE (XEXP (x, 1)) == CONST_INT
1915 && INTVAL (XEXP (x, 1)) == 1)
1918 /* Everything else is invalid, because there is no pattern for it. */
1921 /* If shift by a non constant, then this will be expensive. */
1922 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1923 return SH_DYNAMIC_SHIFT_COST;
1925 value = INTVAL (XEXP (x, 1));
1927 /* Otherwise, return the true cost in instructions. */
1928 if (GET_CODE (x) == ASHIFTRT)
1930 int cost = ashiftrt_insns[value];
1931 /* If SH3, then we put the constant in a reg and use shad. */
1932 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1933 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1937 return shift_insns[value];
1940 /* Return the cost of an AND operation. */
1947 /* Anding with a register is a single cycle and instruction. */
1948 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1951 i = INTVAL (XEXP (x, 1));
1955 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1956 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1957 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1960 return 1 + rtx_cost (XEXP (x, 1), AND);
1963 /* These constants are single cycle extu.[bw] instructions. */
1964 if (i == 0xff || i == 0xffff)
1966 /* Constants that can be used in an and immediate instruction in a single
1967 cycle, but this requires r0, so make it a little more expensive. */
1968 if (CONST_OK_FOR_K08 (i))
1970 /* Constants that can be loaded with a mov immediate and an and.
1971 This case is probably unnecessary. */
1972 if (CONST_OK_FOR_I08 (i))
1974 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1975 This case is probably unnecessary. */
1979 /* Return the cost of an addition or a subtraction. */
1984 /* Adding a register is a single cycle insn. */
1985 if (GET_CODE (XEXP (x, 1)) == REG
1986 || GET_CODE (XEXP (x, 1)) == SUBREG)
1989 /* Likewise for small constants. */
1990 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1991 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1995 switch (GET_CODE (XEXP (x, 1)))
2000 return TARGET_SHMEDIA64 ? 5 : 3;
2003 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2005 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2007 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2015 /* Any other constant requires a 2 cycle pc-relative load plus an
2020 /* Return the cost of a multiply. */
2022 multcosts (rtx x ATTRIBUTE_UNUSED)
2024 if (sh_multcost >= 0)
2027 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2028 accept constants. Ideally, we would use a cost of one or two and
2029 add the cost of the operand, but disregard the latter when inside loops
2030 and loop invariant code motion is still to follow.
2031 Using a multiply first and splitting it later if it's a loss
2032 doesn't work because of different sign / zero extension semantics
2033 of multiplies vs. shifts. */
2034 return TARGET_SMALLCODE ? 2 : 3;
2038 /* We have a mul insn, so we can never take more than the mul and the
2039 read of the mac reg, but count more because of the latency and extra
2041 if (TARGET_SMALLCODE)
2046 /* If we're aiming at small code, then just count the number of
2047 insns in a multiply call sequence. */
2048 if (TARGET_SMALLCODE)
2051 /* Otherwise count all the insns in the routine we'd be calling too. */
2055 /* Compute a (partial) cost for rtx X. Return true if the complete
2056 cost has been computed, and false if subexpressions should be
2057 scanned. In either case, *TOTAL contains the cost result. */
2060 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2067 if (INTVAL (x) == 0)
2069 else if (outer_code == AND && and_operand ((x), DImode))
2071 else if ((outer_code == IOR || outer_code == XOR
2072 || outer_code == PLUS)
2073 && CONST_OK_FOR_I10 (INTVAL (x)))
2075 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2076 *total = COSTS_N_INSNS (outer_code != SET);
2077 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2078 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2079 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2080 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2082 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2085 if (CONST_OK_FOR_I08 (INTVAL (x)))
2087 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2088 && CONST_OK_FOR_K08 (INTVAL (x)))
2097 if (TARGET_SHMEDIA64)
2098 *total = COSTS_N_INSNS (4);
2099 else if (TARGET_SHMEDIA32)
2100 *total = COSTS_N_INSNS (2);
2107 *total = COSTS_N_INSNS (4);
2112 if (x == CONST0_RTX (GET_MODE (x)))
2114 else if (sh_1el_vec (x, VOIDmode))
2115 *total = outer_code != SET;
2116 if (sh_rep_vec (x, VOIDmode))
2117 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2118 + (outer_code != SET));
2119 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2124 *total = COSTS_N_INSNS (addsubcosts (x));
2128 *total = COSTS_N_INSNS (andcosts (x));
2132 *total = COSTS_N_INSNS (multcosts (x));
2138 *total = COSTS_N_INSNS (shiftcosts (x));
2145 *total = COSTS_N_INSNS (20);
2149 if (sh_1el_vec (x, VOIDmode))
2150 *total = outer_code != SET;
2151 if (sh_rep_vec (x, VOIDmode))
2152 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2153 + (outer_code != SET));
2154 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2167 /* Compute the cost of an address. For the SH, all valid addresses are
2168 the same cost. Use a slightly higher cost for reg + reg addressing,
2169 since it increases pressure on r0. */
2172 sh_address_cost (rtx X)
2174 return (GET_CODE (X) == PLUS
2175 && ! CONSTANT_P (XEXP (X, 1))
2176 && ! TARGET_SHMEDIA ? 1 : 0);
2179 /* Code to expand a shift. */
2182 gen_ashift (int type, int n, rtx reg)
2184 /* Negative values here come from the shift_amounts array. */
2197 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2201 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2203 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2206 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2211 /* Same for HImode */
2214 gen_ashift_hi (int type, int n, rtx reg)
2216 /* Negative values here come from the shift_amounts array. */
2230 /* We don't have HImode right shift operations because using the
2231 ordinary 32 bit shift instructions for that doesn't generate proper
2232 zero/sign extension.
2233 gen_ashift_hi is only called in contexts where we know that the
2234 sign extension works out correctly. */
2237 if (GET_CODE (reg) == SUBREG)
2239 offset = SUBREG_BYTE (reg);
2240 reg = SUBREG_REG (reg);
2242 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2246 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2251 /* Output RTL to split a constant shift into its component SH constant
2252 shift instructions. */
2255 gen_shifty_op (int code, rtx *operands)
2257 int value = INTVAL (operands[2]);
2260 /* Truncate the shift count in case it is out of bounds. */
2261 value = value & 0x1f;
2265 if (code == LSHIFTRT)
2267 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2268 emit_insn (gen_movt (operands[0]));
2271 else if (code == ASHIFT)
2273 /* There is a two instruction sequence for 31 bit left shifts,
2274 but it requires r0. */
2275 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2277 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2278 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2283 else if (value == 0)
2285 /* This can happen even when optimizing, if there were subregs before
2286 reload. Don't output a nop here, as this is never optimized away;
2287 use a no-op move instead. */
2288 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2292 max = shift_insns[value];
2293 for (i = 0; i < max; i++)
2294 gen_ashift (code, shift_amounts[value][i], operands[0]);
2297 /* Same as above, but optimized for values where the topmost bits don't
2301 gen_shifty_hi_op (int code, rtx *operands)
2303 int value = INTVAL (operands[2]);
2305 void (*gen_fun) (int, int, rtx);
2307 /* This operation is used by and_shl for SImode values with a few
2308 high bits known to be cleared. */
2312 emit_insn (gen_nop ());
2316 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2319 max = ext_shift_insns[value];
2320 for (i = 0; i < max; i++)
2321 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2324 /* When shifting right, emit the shifts in reverse order, so that
2325 solitary negative values come first. */
2326 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2327 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2330 /* Output RTL for an arithmetic right shift. */
2332 /* ??? Rewrite to use super-optimizer sequences. */
2335 expand_ashiftrt (rtx *operands)
2343 if (GET_CODE (operands[2]) != CONST_INT)
2345 rtx count = copy_to_mode_reg (SImode, operands[2]);
2346 emit_insn (gen_negsi2 (count, count));
2347 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2350 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2351 > 1 + SH_DYNAMIC_SHIFT_COST)
2354 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2355 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2359 if (GET_CODE (operands[2]) != CONST_INT)
2362 value = INTVAL (operands[2]) & 31;
2366 /* If we are called from abs expansion, arrange things so that we
2367 we can use a single MT instruction that doesn't clobber the source,
2368 if LICM can hoist out the load of the constant zero. */
2369 if (currently_expanding_to_rtl)
2371 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2373 emit_insn (gen_mov_neg_si_t (operands[0]));
2376 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2379 else if (value >= 16 && value <= 19)
2381 wrk = gen_reg_rtx (SImode);
2382 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2385 gen_ashift (ASHIFTRT, 1, wrk);
2386 emit_move_insn (operands[0], wrk);
2389 /* Expand a short sequence inline, longer call a magic routine. */
2390 else if (value <= 5)
2392 wrk = gen_reg_rtx (SImode);
2393 emit_move_insn (wrk, operands[1]);
2395 gen_ashift (ASHIFTRT, 1, wrk);
2396 emit_move_insn (operands[0], wrk);
2400 wrk = gen_reg_rtx (Pmode);
2402 /* Load the value into an arg reg and call a helper. */
2403 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2404 sprintf (func, "__ashiftrt_r4_%d", value);
2405 function_symbol (wrk, func, SFUNC_STATIC);
2406 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2407 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2412 sh_dynamicalize_shift_p (rtx count)
2414 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2417 /* Try to find a good way to implement the combiner pattern
2418 [(set (match_operand:SI 0 "register_operand" "r")
2419 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2420 (match_operand:SI 2 "const_int_operand" "n"))
2421 (match_operand:SI 3 "const_int_operand" "n"))) .
2422 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2423 return 0 for simple right / left or left/right shift combination.
2424 return 1 for a combination of shifts with zero_extend.
2425 return 2 for a combination of shifts with an AND that needs r0.
2426 return 3 for a combination of shifts with an AND that needs an extra
2427 scratch register, when the three highmost bits of the AND mask are clear.
2428 return 4 for a combination of shifts with an AND that needs an extra
2429 scratch register, when any of the three highmost bits of the AND mask
2431 If ATTRP is set, store an initial right shift width in ATTRP[0],
2432 and the instruction length in ATTRP[1] . These values are not valid
2434 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2435 shift_amounts for the last shift value that is to be used before the
2438 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2440 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2441 int left = INTVAL (left_rtx), right;
2443 int cost, best_cost = 10000;
2444 int best_right = 0, best_len = 0;
2448 if (left < 0 || left > 31)
2450 if (GET_CODE (mask_rtx) == CONST_INT)
2451 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2453 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2454 /* Can this be expressed as a right shift / left shift pair? */
2455 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2456 right = exact_log2 (lsb);
2457 mask2 = ~(mask + lsb - 1);
2458 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2459 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2461 best_cost = shift_insns[right] + shift_insns[right + left];
2462 /* mask has no trailing zeroes <==> ! right */
2463 else if (! right && mask2 == ~(lsb2 - 1))
2465 int late_right = exact_log2 (lsb2);
2466 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2468 /* Try to use zero extend. */
2469 if (mask2 == ~(lsb2 - 1))
2473 for (width = 8; width <= 16; width += 8)
2475 /* Can we zero-extend right away? */
2476 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2479 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2480 if (cost < best_cost)
2491 /* ??? Could try to put zero extend into initial right shift,
2492 or even shift a bit left before the right shift. */
2493 /* Determine value of first part of left shift, to get to the
2494 zero extend cut-off point. */
2495 first = width - exact_log2 (lsb2) + right;
2496 if (first >= 0 && right + left - first >= 0)
2498 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2499 + ext_shift_insns[right + left - first];
2500 if (cost < best_cost)
2512 /* Try to use r0 AND pattern */
2513 for (i = 0; i <= 2; i++)
2517 if (! CONST_OK_FOR_K08 (mask >> i))
2519 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2520 if (cost < best_cost)
2525 best_len = cost - 1;
2528 /* Try to use a scratch register to hold the AND operand. */
2529 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2530 for (i = 0; i <= 2; i++)
2534 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2535 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2536 if (cost < best_cost)
2541 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2547 attrp[0] = best_right;
2548 attrp[1] = best_len;
2553 /* This is used in length attributes of the unnamed instructions
2554 corresponding to shl_and_kind return values of 1 and 2. */
2556 shl_and_length (rtx insn)
2558 rtx set_src, left_rtx, mask_rtx;
2561 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2562 left_rtx = XEXP (XEXP (set_src, 0), 1);
2563 mask_rtx = XEXP (set_src, 1);
2564 shl_and_kind (left_rtx, mask_rtx, attributes);
2565 return attributes[1];
2568 /* This is used in length attribute of the and_shl_scratch instruction. */
2571 shl_and_scr_length (rtx insn)
2573 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2574 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2575 rtx op = XEXP (set_src, 0);
2576 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2577 op = XEXP (XEXP (op, 0), 0);
2578 return len + shift_insns[INTVAL (XEXP (op, 1))];
2581 /* Generate rtl for instructions for which shl_and_kind advised a particular
2582 method of generating them, i.e. returned zero. */
2585 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2588 unsigned HOST_WIDE_INT mask;
2589 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2590 int right, total_shift;
2591 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2593 right = attributes[0];
2594 total_shift = INTVAL (left_rtx) + right;
2595 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2602 int first = attributes[2];
2607 emit_insn ((mask << right) <= 0xff
2608 ? gen_zero_extendqisi2 (dest,
2609 gen_lowpart (QImode, source))
2610 : gen_zero_extendhisi2 (dest,
2611 gen_lowpart (HImode, source)));
2615 emit_insn (gen_movsi (dest, source));
2619 operands[2] = GEN_INT (right);
2620 gen_shifty_hi_op (LSHIFTRT, operands);
2624 operands[2] = GEN_INT (first);
2625 gen_shifty_hi_op (ASHIFT, operands);
2626 total_shift -= first;
2630 emit_insn (mask <= 0xff
2631 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2632 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2633 if (total_shift > 0)
2635 operands[2] = GEN_INT (total_shift);
2636 gen_shifty_hi_op (ASHIFT, operands);
2641 shift_gen_fun = gen_shifty_op;
2643 /* If the topmost bit that matters is set, set the topmost bits
2644 that don't matter. This way, we might be able to get a shorter
2646 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2647 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2649 /* Don't expand fine-grained when combining, because that will
2650 make the pattern fail. */
2651 if (currently_expanding_to_rtl
2652 || reload_in_progress || reload_completed)
2656 /* Cases 3 and 4 should be handled by this split
2657 only while combining */
2658 gcc_assert (kind <= 2);
2661 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2664 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2669 operands[2] = GEN_INT (total_shift);
2670 shift_gen_fun (ASHIFT, operands);
2677 if (kind != 4 && total_shift < 16)
2679 neg = -ext_shift_amounts[total_shift][1];
2681 neg -= ext_shift_amounts[total_shift][2];
2685 emit_insn (gen_and_shl_scratch (dest, source,
2688 GEN_INT (total_shift + neg),
2690 emit_insn (gen_movsi (dest, dest));
2697 /* Try to find a good way to implement the combiner pattern
2698 [(set (match_operand:SI 0 "register_operand" "=r")
2699 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2700 (match_operand:SI 2 "const_int_operand" "n")
2701 (match_operand:SI 3 "const_int_operand" "n")
2703 (clobber (reg:SI T_REG))]
2704 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2705 return 0 for simple left / right shift combination.
2706 return 1 for left shift / 8 bit sign extend / left shift.
2707 return 2 for left shift / 16 bit sign extend / left shift.
2708 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2709 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2710 return 5 for left shift / 16 bit sign extend / right shift
2711 return 6 for < 8 bit sign extend / left shift.
2712 return 7 for < 8 bit sign extend / left shift / single right shift.
2713 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2716 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2718 int left, size, insize, ext;
2719 int cost = 0, best_cost;
2722 left = INTVAL (left_rtx);
2723 size = INTVAL (size_rtx);
2724 insize = size - left;
2725 gcc_assert (insize > 0);
2726 /* Default to left / right shift. */
2728 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2731 /* 16 bit shift / sign extend / 16 bit shift */
2732 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2733 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2734 below, by alternative 3 or something even better. */
2735 if (cost < best_cost)
2741 /* Try a plain sign extend between two shifts. */
2742 for (ext = 16; ext >= insize; ext -= 8)
2746 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2747 if (cost < best_cost)
2749 kind = ext / (unsigned) 8;
2753 /* Check if we can do a sloppy shift with a final signed shift
2754 restoring the sign. */
2755 if (EXT_SHIFT_SIGNED (size - ext))
2756 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2757 /* If not, maybe it's still cheaper to do the second shift sloppy,
2758 and do a final sign extend? */
2759 else if (size <= 16)
2760 cost = ext_shift_insns[ext - insize] + 1
2761 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2764 if (cost < best_cost)
2766 kind = ext / (unsigned) 8 + 2;
2770 /* Check if we can sign extend in r0 */
2773 cost = 3 + shift_insns[left];
2774 if (cost < best_cost)
2779 /* Try the same with a final signed shift. */
2782 cost = 3 + ext_shift_insns[left + 1] + 1;
2783 if (cost < best_cost)
2792 /* Try to use a dynamic shift. */
2793 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2794 if (cost < best_cost)
2805 /* Function to be used in the length attribute of the instructions
2806 implementing this pattern. */
2809 shl_sext_length (rtx insn)
2811 rtx set_src, left_rtx, size_rtx;
2814 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2815 left_rtx = XEXP (XEXP (set_src, 0), 1);
2816 size_rtx = XEXP (set_src, 1);
2817 shl_sext_kind (left_rtx, size_rtx, &cost);
2821 /* Generate rtl for this pattern */
2824 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2827 int left, size, insize, cost;
2830 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2831 left = INTVAL (left_rtx);
2832 size = INTVAL (size_rtx);
2833 insize = size - left;
2841 int ext = kind & 1 ? 8 : 16;
2842 int shift2 = size - ext;
2844 /* Don't expand fine-grained when combining, because that will
2845 make the pattern fail. */
2846 if (! currently_expanding_to_rtl
2847 && ! reload_in_progress && ! reload_completed)
2849 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2850 emit_insn (gen_movsi (dest, source));
2854 emit_insn (gen_movsi (dest, source));
2858 operands[2] = GEN_INT (ext - insize);
2859 gen_shifty_hi_op (ASHIFT, operands);
2862 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2863 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2868 operands[2] = GEN_INT (shift2);
2869 gen_shifty_op (ASHIFT, operands);
2876 if (EXT_SHIFT_SIGNED (shift2))
2878 operands[2] = GEN_INT (shift2 + 1);
2879 gen_shifty_op (ASHIFT, operands);
2880 operands[2] = const1_rtx;
2881 gen_shifty_op (ASHIFTRT, operands);
2884 operands[2] = GEN_INT (shift2);
2885 gen_shifty_hi_op (ASHIFT, operands);
2889 operands[2] = GEN_INT (-shift2);
2890 gen_shifty_hi_op (LSHIFTRT, operands);
2892 emit_insn (size <= 8
2893 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2894 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2901 if (! currently_expanding_to_rtl
2902 && ! reload_in_progress && ! reload_completed)
2903 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2907 operands[2] = GEN_INT (16 - insize);
2908 gen_shifty_hi_op (ASHIFT, operands);
2909 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2911 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2913 gen_ashift (ASHIFTRT, 1, dest);
2918 /* Don't expand fine-grained when combining, because that will
2919 make the pattern fail. */
2920 if (! currently_expanding_to_rtl
2921 && ! reload_in_progress && ! reload_completed)
2923 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2924 emit_insn (gen_movsi (dest, source));
2927 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2928 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2929 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2931 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2932 gen_shifty_op (ASHIFT, operands);
2934 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2942 /* Prefix a symbol_ref name with "datalabel". */
2945 gen_datalabel_ref (rtx sym)
2949 if (GET_CODE (sym) == LABEL_REF)
2950 return gen_rtx_CONST (GET_MODE (sym),
2951 gen_rtx_UNSPEC (GET_MODE (sym),
2955 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2957 str = XSTR (sym, 0);
2958 /* Share all SYMBOL_REF strings with the same value - that is important
2960 str = IDENTIFIER_POINTER (get_identifier (str));
2961 XSTR (sym, 0) = str;
2967 /* The SH cannot load a large constant into a register, constants have to
2968 come from a pc relative load. The reference of a pc relative load
2969 instruction must be less than 1k in front of the instruction. This
2970 means that we often have to dump a constant inside a function, and
2971 generate code to branch around it.
2973 It is important to minimize this, since the branches will slow things
2974 down and make things bigger.
2976 Worst case code looks like:
2994 We fix this by performing a scan before scheduling, which notices which
2995 instructions need to have their operands fetched from the constant table
2996 and builds the table.
3000 scan, find an instruction which needs a pcrel move. Look forward, find the
3001 last barrier which is within MAX_COUNT bytes of the requirement.
3002 If there isn't one, make one. Process all the instructions between
3003 the find and the barrier.
3005 In the above example, we can tell that L3 is within 1k of L1, so
3006 the first move can be shrunk from the 3 insn+constant sequence into
3007 just 1 insn, and the constant moved to L3 to make:
3018 Then the second move becomes the target for the shortening process. */
3022 rtx value; /* Value in table. */
3023 rtx label; /* Label of value. */
3024 rtx wend; /* End of window. */
3025 enum machine_mode mode; /* Mode of value. */
3027 /* True if this constant is accessed as part of a post-increment
3028 sequence. Note that HImode constants are never accessed in this way. */
3029 bool part_of_sequence_p;
3032 /* The maximum number of constants that can fit into one pool, since
3033 constants in the range 0..510 are at least 2 bytes long, and in the
3034 range from there to 1018 at least 4 bytes. */
3036 #define MAX_POOL_SIZE 372
3037 static pool_node pool_vector[MAX_POOL_SIZE];
3038 static int pool_size;
3039 static rtx pool_window_label;
3040 static int pool_window_last;
3042 /* ??? If we need a constant in HImode which is the truncated value of a
3043 constant we need in SImode, we could combine the two entries thus saving
3044 two bytes. Is this common enough to be worth the effort of implementing
3047 /* ??? This stuff should be done at the same time that we shorten branches.
3048 As it is now, we must assume that all branches are the maximum size, and
3049 this causes us to almost always output constant pools sooner than
3052 /* Add a constant to the pool and return its label. */
3055 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3058 rtx lab, new, ref, newref;
3060 /* First see if we've already got it. */
3061 for (i = 0; i < pool_size; i++)
3063 if (x->code == pool_vector[i].value->code
3064 && mode == pool_vector[i].mode)
3066 if (x->code == CODE_LABEL)
3068 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3071 if (rtx_equal_p (x, pool_vector[i].value))
3076 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3078 new = gen_label_rtx ();
3079 LABEL_REFS (new) = pool_vector[i].label;
3080 pool_vector[i].label = lab = new;
3082 if (lab && pool_window_label)
3084 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3085 ref = pool_vector[pool_window_last].wend;
3086 LABEL_NEXTREF (newref) = ref;
3087 pool_vector[pool_window_last].wend = newref;
3090 pool_window_label = new;
3091 pool_window_last = i;
3097 /* Need a new one. */
3098 pool_vector[pool_size].value = x;
3099 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3102 pool_vector[pool_size - 1].part_of_sequence_p = true;
3105 lab = gen_label_rtx ();
3106 pool_vector[pool_size].mode = mode;
3107 pool_vector[pool_size].label = lab;
3108 pool_vector[pool_size].wend = NULL_RTX;
3109 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3110 if (lab && pool_window_label)
3112 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3113 ref = pool_vector[pool_window_last].wend;
3114 LABEL_NEXTREF (newref) = ref;
3115 pool_vector[pool_window_last].wend = newref;
3118 pool_window_label = lab;
3119 pool_window_last = pool_size;
3124 /* Output the literal table. START, if nonzero, is the first instruction
3125 this table is needed for, and also indicates that there is at least one
3126 casesi_worker_2 instruction; We have to emit the operand3 labels from
3127 these insns at a 4-byte aligned position. BARRIER is the barrier
3128 after which we are to place the table. */
3131 dump_table (rtx start, rtx barrier)
3139 /* Do two passes, first time dump out the HI sized constants. */
3141 for (i = 0; i < pool_size; i++)
3143 pool_node *p = &pool_vector[i];
3145 if (p->mode == HImode)
3149 scan = emit_insn_after (gen_align_2 (), scan);
3152 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3153 scan = emit_label_after (lab, scan);
3154 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3156 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3158 lab = XEXP (ref, 0);
3159 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3162 else if (p->mode == DFmode)
3170 scan = emit_insn_after (gen_align_4 (), scan);
3172 for (; start != barrier; start = NEXT_INSN (start))
3173 if (GET_CODE (start) == INSN
3174 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3176 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3177 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3179 scan = emit_label_after (lab, scan);
3182 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3184 rtx align_insn = NULL_RTX;
3186 scan = emit_label_after (gen_label_rtx (), scan);
3187 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3190 for (i = 0; i < pool_size; i++)
3192 pool_node *p = &pool_vector[i];
3200 if (align_insn && !p->part_of_sequence_p)
3202 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3203 emit_label_before (lab, align_insn);
3204 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3206 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3208 lab = XEXP (ref, 0);
3209 emit_insn_before (gen_consttable_window_end (lab),
3212 delete_insn (align_insn);
3213 align_insn = NULL_RTX;
3218 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3219 scan = emit_label_after (lab, scan);
3220 scan = emit_insn_after (gen_consttable_4 (p->value,
3222 need_align = ! need_align;
3228 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3233 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3234 scan = emit_label_after (lab, scan);
3235 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3242 if (p->mode != HImode)
3244 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3246 lab = XEXP (ref, 0);
3247 scan = emit_insn_after (gen_consttable_window_end (lab),
3256 for (i = 0; i < pool_size; i++)
3258 pool_node *p = &pool_vector[i];
3269 scan = emit_label_after (gen_label_rtx (), scan);
3270 scan = emit_insn_after (gen_align_4 (), scan);
3272 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3273 scan = emit_label_after (lab, scan);
3274 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3282 scan = emit_label_after (gen_label_rtx (), scan);
3283 scan = emit_insn_after (gen_align_4 (), scan);
3285 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3286 scan = emit_label_after (lab, scan);
3287 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3294 if (p->mode != HImode)
3296 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3298 lab = XEXP (ref, 0);
3299 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3304 scan = emit_insn_after (gen_consttable_end (), scan);
3305 scan = emit_barrier_after (scan);
3307 pool_window_label = NULL_RTX;
3308 pool_window_last = 0;
3311 /* Return nonzero if constant would be an ok source for a
3312 mov.w instead of a mov.l. */
3317 return (GET_CODE (src) == CONST_INT
3318 && INTVAL (src) >= -32768
3319 && INTVAL (src) <= 32767);
3322 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3324 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3325 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3326 need to fix it if the input value is CONST_OK_FOR_I08. */
3329 broken_move (rtx insn)
3331 if (GET_CODE (insn) == INSN)
3333 rtx pat = PATTERN (insn);
3334 if (GET_CODE (pat) == PARALLEL)
3335 pat = XVECEXP (pat, 0, 0);
3336 if (GET_CODE (pat) == SET
3337 /* We can load any 8 bit value if we don't care what the high
3338 order bits end up as. */
3339 && GET_MODE (SET_DEST (pat)) != QImode
3340 && (CONSTANT_P (SET_SRC (pat))
3341 /* Match mova_const. */
3342 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3343 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3344 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3346 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3347 && (fp_zero_operand (SET_SRC (pat))
3348 || fp_one_operand (SET_SRC (pat)))
3349 /* ??? If this is a -m4 or -m4-single compilation, in general
3350 we don't know the current setting of fpscr, so disable fldi.
3351 There is an exception if this was a register-register move
3352 before reload - and hence it was ascertained that we have
3353 single precision setting - and in a post-reload optimization
3354 we changed this to do a constant load. In that case
3355 we don't have an r0 clobber, hence we must use fldi. */
3356 && (! TARGET_SH4 || TARGET_FMOVD
3357 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3359 && GET_CODE (SET_DEST (pat)) == REG
3360 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3362 && GET_MODE (SET_DEST (pat)) == SImode
3363 && GET_CODE (SET_SRC (pat)) == CONST_INT
3364 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3365 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3366 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3376 return (GET_CODE (insn) == INSN
3377 && GET_CODE (PATTERN (insn)) == SET
3378 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3379 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3380 /* Don't match mova_const. */
3381 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3384 /* Fix up a mova from a switch that went out of range. */
3386 fixup_mova (rtx mova)
3390 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3391 INSN_CODE (mova) = -1;
3396 rtx lab = gen_label_rtx ();
3397 rtx wpat, wpat0, wpat1, wsrc, diff;
3401 worker = NEXT_INSN (worker);
3403 && GET_CODE (worker) != CODE_LABEL
3404 && GET_CODE (worker) != JUMP_INSN);
3405 } while (GET_CODE (worker) == NOTE
3406 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3407 wpat = PATTERN (worker);
3408 wpat0 = XVECEXP (wpat, 0, 0);
3409 wpat1 = XVECEXP (wpat, 0, 1);
3410 wsrc = SET_SRC (wpat0);
3411 PATTERN (worker) = (gen_casesi_worker_2
3412 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3413 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3415 INSN_CODE (worker) = -1;
3416 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3417 gen_rtx_LABEL_REF (Pmode, lab));
3418 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3419 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3420 INSN_CODE (mova) = -1;
3424 /* Find the last barrier from insn FROM which is close enough to hold the
3425 constant pool. If we can't find one, then create one near the end of
3429 find_barrier (int num_mova, rtx mova, rtx from)
3438 int leading_mova = num_mova;
3439 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3443 /* For HImode: range is 510, add 4 because pc counts from address of
3444 second instruction after this one, subtract 2 for the jump instruction
3445 that we may need to emit before the table, subtract 2 for the instruction
3446 that fills the jump delay slot (in very rare cases, reorg will take an
3447 instruction from after the constant pool or will leave the delay slot
3448 empty). This gives 510.
3449 For SImode: range is 1020, add 4 because pc counts from address of
3450 second instruction after this one, subtract 2 in case pc is 2 byte
3451 aligned, subtract 2 for the jump instruction that we may need to emit
3452 before the table, subtract 2 for the instruction that fills the jump
3453 delay slot. This gives 1018. */
3455 /* The branch will always be shortened now that the reference address for
3456 forward branches is the successor address, thus we need no longer make
3457 adjustments to the [sh]i_limit for -O0. */
3462 while (from && count_si < si_limit && count_hi < hi_limit)
3464 int inc = get_attr_length (from);
3467 if (GET_CODE (from) == CODE_LABEL)
3470 new_align = 1 << label_to_alignment (from);
3471 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3472 new_align = 1 << barrier_align (from);
3478 if (GET_CODE (from) == BARRIER)
3481 found_barrier = from;
3483 /* If we are at the end of the function, or in front of an alignment
3484 instruction, we need not insert an extra alignment. We prefer
3485 this kind of barrier. */
3486 if (barrier_align (from) > 2)
3487 good_barrier = from;
3490 if (broken_move (from))
3493 enum machine_mode mode;
3495 pat = PATTERN (from);
3496 if (GET_CODE (pat) == PARALLEL)
3497 pat = XVECEXP (pat, 0, 0);
3498 src = SET_SRC (pat);
3499 dst = SET_DEST (pat);
3500 mode = GET_MODE (dst);
3502 /* We must explicitly check the mode, because sometimes the
3503 front end will generate code to load unsigned constants into
3504 HImode targets without properly sign extending them. */
3506 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3509 /* We put the short constants before the long constants, so
3510 we must count the length of short constants in the range
3511 for the long constants. */
3512 /* ??? This isn't optimal, but is easy to do. */
3517 /* We dump DF/DI constants before SF/SI ones, because
3518 the limit is the same, but the alignment requirements
3519 are higher. We may waste up to 4 additional bytes
3520 for alignment, and the DF/DI constant may have
3521 another SF/SI constant placed before it. */
3522 if (TARGET_SHCOMPACT
3524 && (mode == DFmode || mode == DImode))
3529 while (si_align > 2 && found_si + si_align - 2 > count_si)
3531 if (found_si > count_si)
3532 count_si = found_si;
3533 found_si += GET_MODE_SIZE (mode);
3535 si_limit -= GET_MODE_SIZE (mode);
3545 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3547 if (found_si > count_si)
3548 count_si = found_si;
3550 else if (GET_CODE (from) == JUMP_INSN
3551 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3552 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3556 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3558 /* We have just passed the barrier in front of the
3559 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3560 the ADDR_DIFF_VEC is accessed as data, just like our pool
3561 constants, this is a good opportunity to accommodate what
3562 we have gathered so far.
3563 If we waited any longer, we could end up at a barrier in
3564 front of code, which gives worse cache usage for separated
3565 instruction / data caches. */
3566 good_barrier = found_barrier;
3571 rtx body = PATTERN (from);
3572 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3575 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3576 else if (GET_CODE (from) == JUMP_INSN
3578 && ! TARGET_SMALLCODE)
3584 if (new_align > si_align)
3586 si_limit -= (count_si - 1) & (new_align - si_align);
3587 si_align = new_align;
3589 count_si = (count_si + new_align - 1) & -new_align;
3594 if (new_align > hi_align)
3596 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3597 hi_align = new_align;
3599 count_hi = (count_hi + new_align - 1) & -new_align;
3601 from = NEXT_INSN (from);
3608 /* Try as we might, the leading mova is out of range. Change
3609 it into a load (which will become a pcload) and retry. */
3611 return find_barrier (0, 0, mova);
3615 /* Insert the constant pool table before the mova instruction,
3616 to prevent the mova label reference from going out of range. */
3618 good_barrier = found_barrier = barrier_before_mova;
3624 if (good_barrier && next_real_insn (found_barrier))
3625 found_barrier = good_barrier;
3629 /* We didn't find a barrier in time to dump our stuff,
3630 so we'll make one. */
3631 rtx label = gen_label_rtx ();
3633 /* If we exceeded the range, then we must back up over the last
3634 instruction we looked at. Otherwise, we just need to undo the
3635 NEXT_INSN at the end of the loop. */
3636 if (count_hi > hi_limit || count_si > si_limit)
3637 from = PREV_INSN (PREV_INSN (from));
3639 from = PREV_INSN (from);
3641 /* Walk back to be just before any jump or label.
3642 Putting it before a label reduces the number of times the branch
3643 around the constant pool table will be hit. Putting it before
3644 a jump makes it more likely that the bra delay slot will be
3646 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3647 || GET_CODE (from) == CODE_LABEL)
3648 from = PREV_INSN (from);
3650 from = emit_jump_insn_after (gen_jump (label), from);
3651 JUMP_LABEL (from) = label;
3652 LABEL_NUSES (label) = 1;
3653 found_barrier = emit_barrier_after (from);
3654 emit_label_after (label, found_barrier);
3657 return found_barrier;
3660 /* If the instruction INSN is implemented by a special function, and we can
3661 positively find the register that is used to call the sfunc, and this
3662 register is not used anywhere else in this instruction - except as the
3663 destination of a set, return this register; else, return 0. */
3665 sfunc_uses_reg (rtx insn)
3668 rtx pattern, part, reg_part, reg;
3670 if (GET_CODE (insn) != INSN)
3672 pattern = PATTERN (insn);
3673 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3676 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3678 part = XVECEXP (pattern, 0, i);
3679 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3684 reg = XEXP (reg_part, 0);
3685 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3687 part = XVECEXP (pattern, 0, i);
3688 if (part == reg_part || GET_CODE (part) == CLOBBER)
3690 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3691 && GET_CODE (SET_DEST (part)) == REG)
3692 ? SET_SRC (part) : part)))
3698 /* See if the only way in which INSN uses REG is by calling it, or by
3699 setting it while calling it. Set *SET to a SET rtx if the register
3703 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3709 reg2 = sfunc_uses_reg (insn);
3710 if (reg2 && REGNO (reg2) == REGNO (reg))
3712 pattern = single_set (insn);
3714 && GET_CODE (SET_DEST (pattern)) == REG
3715 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3719 if (GET_CODE (insn) != CALL_INSN)
3721 /* We don't use rtx_equal_p because we don't care if the mode is
3723 pattern = single_set (insn);
3725 && GET_CODE (SET_DEST (pattern)) == REG
3726 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3732 par = PATTERN (insn);
3733 if (GET_CODE (par) == PARALLEL)
3734 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3736 part = XVECEXP (par, 0, i);
3737 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3740 return reg_mentioned_p (reg, SET_SRC (pattern));
3746 pattern = PATTERN (insn);
3748 if (GET_CODE (pattern) == PARALLEL)
3752 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3753 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3755 pattern = XVECEXP (pattern, 0, 0);
3758 if (GET_CODE (pattern) == SET)
3760 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3762 /* We don't use rtx_equal_p, because we don't care if the
3763 mode is different. */
3764 if (GET_CODE (SET_DEST (pattern)) != REG
3765 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3771 pattern = SET_SRC (pattern);
3774 if (GET_CODE (pattern) != CALL
3775 || GET_CODE (XEXP (pattern, 0)) != MEM
3776 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3782 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3783 general registers. Bits 0..15 mean that the respective registers
3784 are used as inputs in the instruction. Bits 16..31 mean that the
3785 registers 0..15, respectively, are used as outputs, or are clobbered.
3786 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3788 regs_used (rtx x, int is_dest)
3796 code = GET_CODE (x);
3801 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3802 << (REGNO (x) + is_dest));
3806 rtx y = SUBREG_REG (x);
3808 if (GET_CODE (y) != REG)
3811 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3813 subreg_regno_offset (REGNO (y),
3816 GET_MODE (x)) + is_dest));
3820 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3822 /* If there was a return value, it must have been indicated with USE. */
3837 fmt = GET_RTX_FORMAT (code);
3839 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3844 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3845 used |= regs_used (XVECEXP (x, i, j), is_dest);
3847 else if (fmt[i] == 'e')
3848 used |= regs_used (XEXP (x, i), is_dest);
3853 /* Create an instruction that prevents redirection of a conditional branch
3854 to the destination of the JUMP with address ADDR.
3855 If the branch needs to be implemented as an indirect jump, try to find
3856 a scratch register for it.
3857 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3858 If any preceding insn that doesn't fit into a delay slot is good enough,
3859 pass 1. Pass 2 if a definite blocking insn is needed.
3860 -1 is used internally to avoid deep recursion.
3861 If a blocking instruction is made or recognized, return it. */
3864 gen_block_redirect (rtx jump, int addr, int need_block)
3867 rtx prev = prev_nonnote_insn (jump);
3870 /* First, check if we already have an instruction that satisfies our need. */
3871 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3873 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3875 if (GET_CODE (PATTERN (prev)) == USE
3876 || GET_CODE (PATTERN (prev)) == CLOBBER
3877 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3879 else if ((need_block &= ~1) < 0)
3881 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3884 if (GET_CODE (PATTERN (jump)) == RETURN)
3888 /* Reorg even does nasty things with return insns that cause branches
3889 to go out of range - see find_end_label and callers. */
3890 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3892 /* We can't use JUMP_LABEL here because it might be undefined
3893 when not optimizing. */
3894 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3895 /* If the branch is out of range, try to find a scratch register for it. */
3897 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3901 /* Don't look for the stack pointer as a scratch register,
3902 it would cause trouble if an interrupt occurred. */
3903 unsigned try = 0x7fff, used;
3904 int jump_left = flag_expensive_optimizations + 1;
3906 /* It is likely that the most recent eligible instruction is wanted for
3907 the delay slot. Therefore, find out which registers it uses, and
3908 try to avoid using them. */
3910 for (scan = jump; (scan = PREV_INSN (scan)); )
3914 if (INSN_DELETED_P (scan))
3916 code = GET_CODE (scan);
3917 if (code == CODE_LABEL || code == JUMP_INSN)
3920 && GET_CODE (PATTERN (scan)) != USE
3921 && GET_CODE (PATTERN (scan)) != CLOBBER
3922 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3924 try &= ~regs_used (PATTERN (scan), 0);
3928 for (used = dead = 0, scan = JUMP_LABEL (jump);
3929 (scan = NEXT_INSN (scan)); )
3933 if (INSN_DELETED_P (scan))
3935 code = GET_CODE (scan);
3938 used |= regs_used (PATTERN (scan), 0);
3939 if (code == CALL_INSN)
3940 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3941 dead |= (used >> 16) & ~used;
3947 if (code == JUMP_INSN)
3949 if (jump_left-- && simplejump_p (scan))
3950 scan = JUMP_LABEL (scan);
3956 /* Mask out the stack pointer again, in case it was
3957 the only 'free' register we have found. */
3960 /* If the immediate destination is still in range, check for possible
3961 threading with a jump beyond the delay slot insn.
3962 Don't check if we are called recursively; the jump has been or will be
3963 checked in a different invocation then. */
3965 else if (optimize && need_block >= 0)
3967 rtx next = next_active_insn (next_active_insn (dest));
3968 if (next && GET_CODE (next) == JUMP_INSN
3969 && GET_CODE (PATTERN (next)) == SET
3970 && recog_memoized (next) == CODE_FOR_jump_compact)
3972 dest = JUMP_LABEL (next);
3974 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3976 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3982 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3984 /* It would be nice if we could convert the jump into an indirect
3985 jump / far branch right now, and thus exposing all constituent
3986 instructions to further optimization. However, reorg uses
3987 simplejump_p to determine if there is an unconditional jump where
3988 it should try to schedule instructions from the target of the
3989 branch; simplejump_p fails for indirect jumps even if they have
3991 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3992 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3994 /* ??? We would like this to have the scope of the jump, but that
3995 scope will change when a delay slot insn of an inner scope is added.
3996 Hence, after delay slot scheduling, we'll have to expect
3997 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4000 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4001 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4004 else if (need_block)
4005 /* We can't use JUMP_LABEL here because it might be undefined
4006 when not optimizing. */
4007 return emit_insn_before (gen_block_branch_redirect
4008 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4013 #define CONDJUMP_MIN -252
4014 #define CONDJUMP_MAX 262
4017 /* A label (to be placed) in front of the jump
4018 that jumps to our ultimate destination. */
4020 /* Where we are going to insert it if we cannot move the jump any farther,
4021 or the jump itself if we have picked up an existing jump. */
4023 /* The ultimate destination. */
4025 struct far_branch *prev;
4026 /* If the branch has already been created, its address;
4027 else the address of its first prospective user. */
4031 static void gen_far_branch (struct far_branch *);
4032 enum mdep_reorg_phase_e mdep_reorg_phase;
4034 gen_far_branch (struct far_branch *bp)
4036 rtx insn = bp->insert_place;
4038 rtx label = gen_label_rtx ();
4041 emit_label_after (label, insn);
4044 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4045 LABEL_NUSES (bp->far_label)++;
4048 jump = emit_jump_insn_after (gen_return (), insn);
4049 /* Emit a barrier so that reorg knows that any following instructions
4050 are not reachable via a fall-through path.
4051 But don't do this when not optimizing, since we wouldn't suppress the
4052 alignment for the barrier then, and could end up with out-of-range
4053 pc-relative loads. */
4055 emit_barrier_after (jump);
4056 emit_label_after (bp->near_label, insn);
4057 JUMP_LABEL (jump) = bp->far_label;
4058 ok = invert_jump (insn, label, 1);
4061 /* If we are branching around a jump (rather than a return), prevent
4062 reorg from using an insn from the jump target as the delay slot insn -
4063 when reorg did this, it pessimized code (we rather hide the delay slot)
4064 and it could cause branches to go out of range. */
4067 (gen_stuff_delay_slot
4068 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4069 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4071 /* Prevent reorg from undoing our splits. */
4072 gen_block_redirect (jump, bp->address += 2, 2);
4075 /* Fix up ADDR_DIFF_VECs. */
4077 fixup_addr_diff_vecs (rtx first)
4081 for (insn = first; insn; insn = NEXT_INSN (insn))
4083 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4085 if (GET_CODE (insn) != JUMP_INSN
4086 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4088 pat = PATTERN (insn);
4089 vec_lab = XEXP (XEXP (pat, 0), 0);
4091 /* Search the matching casesi_jump_2. */
4092 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4094 if (GET_CODE (prev) != JUMP_INSN)
4096 prevpat = PATTERN (prev);
4097 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4099 x = XVECEXP (prevpat, 0, 1);
4100 if (GET_CODE (x) != USE)
4103 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4106 /* FIXME: This is a bug in the optimizer, but it seems harmless
4107 to just avoid panicing. */
4111 /* Emit the reference label of the braf where it belongs, right after
4112 the casesi_jump_2 (i.e. braf). */
4113 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4114 emit_label_after (braf_label, prev);
4116 /* Fix up the ADDR_DIF_VEC to be relative
4117 to the reference address of the braf. */
4118 XEXP (XEXP (pat, 0), 0) = braf_label;
4122 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4123 a barrier. Return the base 2 logarithm of the desired alignment. */
4125 barrier_align (rtx barrier_or_label)
4127 rtx next = next_real_insn (barrier_or_label), pat, prev;
4128 int slot, credit, jump_to_next = 0;
4133 pat = PATTERN (next);
4135 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4138 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4139 /* This is a barrier in front of a constant table. */
4142 prev = prev_real_insn (barrier_or_label);
4143 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4145 pat = PATTERN (prev);
4146 /* If this is a very small table, we want to keep the alignment after
4147 the table to the minimum for proper code alignment. */
4148 return ((TARGET_SMALLCODE
4149 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4150 <= (unsigned) 1 << (CACHE_LOG - 2)))
4151 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4154 if (TARGET_SMALLCODE)
4157 if (! TARGET_SH2 || ! optimize)
4158 return align_jumps_log;
4160 /* When fixing up pcloads, a constant table might be inserted just before
4161 the basic block that ends with the barrier. Thus, we can't trust the
4162 instruction lengths before that. */
4163 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4165 /* Check if there is an immediately preceding branch to the insn beyond
4166 the barrier. We must weight the cost of discarding useful information
4167 from the current cache line when executing this branch and there is
4168 an alignment, against that of fetching unneeded insn in front of the
4169 branch target when there is no alignment. */
4171 /* There are two delay_slot cases to consider. One is the simple case
4172 where the preceding branch is to the insn beyond the barrier (simple
4173 delay slot filling), and the other is where the preceding branch has
4174 a delay slot that is a duplicate of the insn after the barrier
4175 (fill_eager_delay_slots) and the branch is to the insn after the insn
4176 after the barrier. */
4178 /* PREV is presumed to be the JUMP_INSN for the barrier under
4179 investigation. Skip to the insn before it. */
4180 prev = prev_real_insn (prev);
4182 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4183 credit >= 0 && prev && GET_CODE (prev) == INSN;
4184 prev = prev_real_insn (prev))
4187 if (GET_CODE (PATTERN (prev)) == USE
4188 || GET_CODE (PATTERN (prev)) == CLOBBER)
4190 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4192 prev = XVECEXP (PATTERN (prev), 0, 1);
4193 if (INSN_UID (prev) == INSN_UID (next))
4195 /* Delay slot was filled with insn at jump target. */
4202 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4204 credit -= get_attr_length (prev);
4207 && GET_CODE (prev) == JUMP_INSN
4208 && JUMP_LABEL (prev))
4212 || next_real_insn (JUMP_LABEL (prev)) == next
4213 /* If relax_delay_slots() decides NEXT was redundant
4214 with some previous instruction, it will have
4215 redirected PREV's jump to the following insn. */
4216 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4217 /* There is no upper bound on redundant instructions
4218 that might have been skipped, but we must not put an
4219 alignment where none had been before. */
4220 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4222 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4223 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4224 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4226 rtx pat = PATTERN (prev);
4227 if (GET_CODE (pat) == PARALLEL)
4228 pat = XVECEXP (pat, 0, 0);
4229 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4235 return align_jumps_log;
4238 /* If we are inside a phony loop, almost any kind of label can turn up as the
4239 first one in the loop. Aligning a braf label causes incorrect switch
4240 destination addresses; we can detect braf labels because they are
4241 followed by a BARRIER.
4242 Applying loop alignment to small constant or switch tables is a waste
4243 of space, so we suppress this too. */
4245 sh_loop_align (rtx label)
4250 next = next_nonnote_insn (next);
4251 while (next && GET_CODE (next) == CODE_LABEL);
4255 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4256 || recog_memoized (next) == CODE_FOR_consttable_2)
4259 return align_loops_log;
4262 /* Do a final pass over the function, just before delayed branch
4268 rtx first, insn, mova = NULL_RTX;
4270 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4271 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4273 first = get_insns ();
4275 /* We must split call insns before introducing `mova's. If we're
4276 optimizing, they'll have already been split. Otherwise, make
4277 sure we don't split them too late. */
4279 split_all_insns_noflow ();
4284 /* If relaxing, generate pseudo-ops to associate function calls with
4285 the symbols they call. It does no harm to not generate these
4286 pseudo-ops. However, when we can generate them, it enables to
4287 linker to potentially relax the jsr to a bsr, and eliminate the
4288 register load and, possibly, the constant pool entry. */
4290 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4293 /* Remove all REG_LABEL notes. We want to use them for our own
4294 purposes. This works because none of the remaining passes
4295 need to look at them.
4297 ??? But it may break in the future. We should use a machine
4298 dependent REG_NOTE, or some other approach entirely. */
4299 for (insn = first; insn; insn = NEXT_INSN (insn))
4305 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4306 remove_note (insn, note);
4310 for (insn = first; insn; insn = NEXT_INSN (insn))
4312 rtx pattern, reg, link, set, scan, dies, label;
4313 int rescan = 0, foundinsn = 0;
4315 if (GET_CODE (insn) == CALL_INSN)
4317 pattern = PATTERN (insn);
4319 if (GET_CODE (pattern) == PARALLEL)
4320 pattern = XVECEXP (pattern, 0, 0);
4321 if (GET_CODE (pattern) == SET)
4322 pattern = SET_SRC (pattern);
4324 if (GET_CODE (pattern) != CALL
4325 || GET_CODE (XEXP (pattern, 0)) != MEM)
4328 reg = XEXP (XEXP (pattern, 0), 0);
4332 reg = sfunc_uses_reg (insn);
4337 if (GET_CODE (reg) != REG)
4340 /* This is a function call via REG. If the only uses of REG
4341 between the time that it is set and the time that it dies
4342 are in function calls, then we can associate all the
4343 function calls with the setting of REG. */
4345 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4347 if (REG_NOTE_KIND (link) != 0)
4349 set = single_set (XEXP (link, 0));
4350 if (set && rtx_equal_p (reg, SET_DEST (set)))
4352 link = XEXP (link, 0);
4359 /* ??? Sometimes global register allocation will have
4360 deleted the insn pointed to by LOG_LINKS. Try
4361 scanning backward to find where the register is set. */
4362 for (scan = PREV_INSN (insn);
4363 scan && GET_CODE (scan) != CODE_LABEL;
4364 scan = PREV_INSN (scan))
4366 if (! INSN_P (scan))
4369 if (! reg_mentioned_p (reg, scan))
4372 if (noncall_uses_reg (reg, scan, &set))
4386 /* The register is set at LINK. */
4388 /* We can only optimize the function call if the register is
4389 being set to a symbol. In theory, we could sometimes
4390 optimize calls to a constant location, but the assembler
4391 and linker do not support that at present. */
4392 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4393 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4396 /* Scan forward from LINK to the place where REG dies, and
4397 make sure that the only insns which use REG are
4398 themselves function calls. */
4400 /* ??? This doesn't work for call targets that were allocated
4401 by reload, since there may not be a REG_DEAD note for the
4405 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4409 /* Don't try to trace forward past a CODE_LABEL if we haven't
4410 seen INSN yet. Ordinarily, we will only find the setting insn
4411 in LOG_LINKS if it is in the same basic block. However,
4412 cross-jumping can insert code labels in between the load and
4413 the call, and can result in situations where a single call
4414 insn may have two targets depending on where we came from. */
4416 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4419 if (! INSN_P (scan))
4422 /* Don't try to trace forward past a JUMP. To optimize
4423 safely, we would have to check that all the
4424 instructions at the jump destination did not use REG. */
4426 if (GET_CODE (scan) == JUMP_INSN)
4429 if (! reg_mentioned_p (reg, scan))
4432 if (noncall_uses_reg (reg, scan, &scanset))
4439 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4441 /* There is a function call to this register other
4442 than the one we are checking. If we optimize
4443 this call, we need to rescan again below. */
4447 /* ??? We shouldn't have to worry about SCANSET here.
4448 We should just be able to check for a REG_DEAD note
4449 on a function call. However, the REG_DEAD notes are
4450 apparently not dependable around libcalls; c-torture
4451 execute/920501-2 is a test case. If SCANSET is set,
4452 then this insn sets the register, so it must have
4453 died earlier. Unfortunately, this will only handle
4454 the cases in which the register is, in fact, set in a
4457 /* ??? We shouldn't have to use FOUNDINSN here.
4458 However, the LOG_LINKS fields are apparently not
4459 entirely reliable around libcalls;
4460 newlib/libm/math/e_pow.c is a test case. Sometimes
4461 an insn will appear in LOG_LINKS even though it is
4462 not the most recent insn which sets the register. */
4466 || find_reg_note (scan, REG_DEAD, reg)))
4475 /* Either there was a branch, or some insn used REG
4476 other than as a function call address. */
4480 /* Create a code label, and put it in a REG_LABEL note on
4481 the insn which sets the register, and on each call insn
4482 which uses the register. In final_prescan_insn we look
4483 for the REG_LABEL notes, and output the appropriate label
4486 label = gen_label_rtx ();
4487 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4489 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4498 scan = NEXT_INSN (scan);
4500 && ((GET_CODE (scan) == CALL_INSN
4501 && reg_mentioned_p (reg, scan))
4502 || ((reg2 = sfunc_uses_reg (scan))
4503 && REGNO (reg2) == REGNO (reg))))
4505 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4507 while (scan != dies);
4513 fixup_addr_diff_vecs (first);
4517 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4518 shorten_branches (first);
4520 /* Scan the function looking for move instructions which have to be
4521 changed to pc-relative loads and insert the literal tables. */
4523 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4524 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4528 /* ??? basic block reordering can move a switch table dispatch
4529 below the switch table. Check if that has happened.
4530 We only have the addresses available when optimizing; but then,
4531 this check shouldn't be needed when not optimizing. */
4532 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4534 && (INSN_ADDRESSES (INSN_UID (insn))
4535 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4537 /* Change the mova into a load.
4538 broken_move will then return true for it. */
4541 else if (! num_mova++)
4544 else if (GET_CODE (insn) == JUMP_INSN
4545 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4553 /* Some code might have been inserted between the mova and
4554 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4555 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4556 total += get_attr_length (scan);
4558 /* range of mova is 1020, add 4 because pc counts from address of
4559 second instruction after this one, subtract 2 in case pc is 2
4560 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4561 cancels out with alignment effects of the mova itself. */
4564 /* Change the mova into a load, and restart scanning
4565 there. broken_move will then return true for mova. */
4570 if (broken_move (insn)
4571 || (GET_CODE (insn) == INSN
4572 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4575 /* Scan ahead looking for a barrier to stick the constant table
4577 rtx barrier = find_barrier (num_mova, mova, insn);
4578 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4579 int need_aligned_label = 0;
4581 if (num_mova && ! mova_p (mova))
4583 /* find_barrier had to change the first mova into a
4584 pcload; thus, we have to start with this new pcload. */
4588 /* Now find all the moves between the points and modify them. */
4589 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4591 if (GET_CODE (scan) == CODE_LABEL)
4593 if (GET_CODE (scan) == INSN
4594 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4595 need_aligned_label = 1;
4596 if (broken_move (scan))
4598 rtx *patp = &PATTERN (scan), pat = *patp;
4602 enum machine_mode mode;
4604 if (GET_CODE (pat) == PARALLEL)
4605 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4606 src = SET_SRC (pat);
4607 dst = SET_DEST (pat);
4608 mode = GET_MODE (dst);
4610 if (mode == SImode && hi_const (src)
4611 && REGNO (dst) != FPUL_REG)
4616 while (GET_CODE (dst) == SUBREG)
4618 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4619 GET_MODE (SUBREG_REG (dst)),
4622 dst = SUBREG_REG (dst);
4624 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4626 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4628 /* This must be an insn that clobbers r0. */
4629 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4630 XVECLEN (PATTERN (scan), 0)
4632 rtx clobber = *clobberp;
4634 gcc_assert (GET_CODE (clobber) == CLOBBER
4635 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4638 && reg_set_between_p (r0_rtx, last_float_move, scan))
4642 && GET_MODE_SIZE (mode) != 4
4643 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4645 lab = add_constant (src, mode, last_float);
4647 emit_insn_before (gen_mova (lab), scan);
4650 /* There will be a REG_UNUSED note for r0 on
4651 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4652 lest reorg:mark_target_live_regs will not
4653 consider r0 to be used, and we end up with delay
4654 slot insn in front of SCAN that clobbers r0. */
4656 = find_regno_note (last_float_move, REG_UNUSED, 0);
4658 /* If we are not optimizing, then there may not be
4661 PUT_MODE (note, REG_INC);
4663 *last_float_addr = r0_inc_rtx;
4665 last_float_move = scan;
4667 newsrc = gen_const_mem (mode,
4668 (((TARGET_SH4 && ! TARGET_FMOVD)
4669 || REGNO (dst) == FPUL_REG)
4672 last_float_addr = &XEXP (newsrc, 0);
4674 /* Remove the clobber of r0. */
4675 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4676 gen_rtx_SCRATCH (Pmode));
4678 /* This is a mova needing a label. Create it. */
4679 else if (GET_CODE (src) == UNSPEC
4680 && XINT (src, 1) == UNSPEC_MOVA
4681 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4683 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4684 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4685 newsrc = gen_rtx_UNSPEC (SImode,
4686 gen_rtvec (1, newsrc),
4691 lab = add_constant (src, mode, 0);
4692 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4693 newsrc = gen_const_mem (mode, newsrc);
4695 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4696 INSN_CODE (scan) = -1;
4699 dump_table (need_aligned_label ? insn : 0, barrier);
4704 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4705 INSN_ADDRESSES_FREE ();
4706 split_branches (first);
4708 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4709 also has an effect on the register that holds the address of the sfunc.
4710 Insert an extra dummy insn in front of each sfunc that pretends to
4711 use this register. */
4712 if (flag_delayed_branch)
4714 for (insn = first; insn; insn = NEXT_INSN (insn))
4716 rtx reg = sfunc_uses_reg (insn);
4720 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4724 /* fpscr is not actually a user variable, but we pretend it is for the
4725 sake of the previous optimization passes, since we want it handled like
4726 one. However, we don't have any debugging information for it, so turn
4727 it into a non-user variable now. */
4729 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4731 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4735 get_dest_uid (rtx label, int max_uid)
4737 rtx dest = next_real_insn (label);
4740 /* This can happen for an undefined label. */
4742 dest_uid = INSN_UID (dest);
4743 /* If this is a newly created branch redirection blocking instruction,
4744 we cannot index the branch_uid or insn_addresses arrays with its
4745 uid. But then, we won't need to, because the actual destination is
4746 the following branch. */
4747 while (dest_uid >= max_uid)
4749 dest = NEXT_INSN (dest);
4750 dest_uid = INSN_UID (dest);
4752 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4757 /* Split condbranches that are out of range. Also add clobbers for
4758 scratch registers that are needed in far jumps.
4759 We do this before delay slot scheduling, so that it can take our
4760 newly created instructions into account. It also allows us to
4761 find branches with common targets more easily. */
4764 split_branches (rtx first)
4767 struct far_branch **uid_branch, *far_branch_list = 0;
4768 int max_uid = get_max_uid ();
4771 /* Find out which branches are out of range. */
4772 shorten_branches (first);
4774 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4775 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4777 for (insn = first; insn; insn = NEXT_INSN (insn))
4778 if (! INSN_P (insn))
4780 else if (INSN_DELETED_P (insn))
4782 /* Shorten_branches would split this instruction again,
4783 so transform it into a note. */
4784 PUT_CODE (insn, NOTE);
4785 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4786 NOTE_SOURCE_FILE (insn) = 0;
4788 else if (GET_CODE (insn) == JUMP_INSN
4789 /* Don't mess with ADDR_DIFF_VEC */
4790 && (GET_CODE (PATTERN (insn)) == SET
4791 || GET_CODE (PATTERN (insn)) == RETURN))
4793 enum attr_type type = get_attr_type (insn);
4794 if (type == TYPE_CBRANCH)
4798 if (get_attr_length (insn) > 4)
4800 rtx src = SET_SRC (PATTERN (insn));
4801 rtx olabel = XEXP (XEXP (src, 1), 0);
4802 int addr = INSN_ADDRESSES (INSN_UID (insn));
4804 int dest_uid = get_dest_uid (olabel, max_uid);
4805 struct far_branch *bp = uid_branch[dest_uid];
4807 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4808 the label if the LABEL_NUSES count drops to zero. There is
4809 always a jump_optimize pass that sets these values, but it
4810 proceeds to delete unreferenced code, and then if not
4811 optimizing, to un-delete the deleted instructions, thus
4812 leaving labels with too low uses counts. */
4815 JUMP_LABEL (insn) = olabel;
4816 LABEL_NUSES (olabel)++;
4820 bp = (struct far_branch *) alloca (sizeof *bp);
4821 uid_branch[dest_uid] = bp;
4822 bp->prev = far_branch_list;
4823 far_branch_list = bp;
4825 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4826 LABEL_NUSES (bp->far_label)++;
4830 label = bp->near_label;
4831 if (! label && bp->address - addr >= CONDJUMP_MIN)
4833 rtx block = bp->insert_place;
4835 if (GET_CODE (PATTERN (block)) == RETURN)
4836 block = PREV_INSN (block);
4838 block = gen_block_redirect (block,
4840 label = emit_label_after (gen_label_rtx (),
4842 bp->near_label = label;
4844 else if (label && ! NEXT_INSN (label))
4846 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4847 bp->insert_place = insn;
4849 gen_far_branch (bp);
4853 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4855 bp->near_label = label = gen_label_rtx ();
4856 bp->insert_place = insn;
4859 ok = redirect_jump (insn, label, 1);
4864 /* get_attr_length (insn) == 2 */
4865 /* Check if we have a pattern where reorg wants to redirect
4866 the branch to a label from an unconditional branch that
4868 /* We can't use JUMP_LABEL here because it might be undefined
4869 when not optimizing. */
4870 /* A syntax error might cause beyond to be NULL_RTX. */
4872 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4876 && (GET_CODE (beyond) == JUMP_INSN
4877 || ((beyond = next_active_insn (beyond))
4878 && GET_CODE (beyond) == JUMP_INSN))
4879 && GET_CODE (PATTERN (beyond)) == SET
4880 && recog_memoized (beyond) == CODE_FOR_jump_compact
4882 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4883 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4885 gen_block_redirect (beyond,
4886 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4889 next = next_active_insn (insn);
4891 if ((GET_CODE (next) == JUMP_INSN
4892 || ((next = next_active_insn (next))
4893 && GET_CODE (next) == JUMP_INSN))
4894 && GET_CODE (PATTERN (next)) == SET
4895 && recog_memoized (next) == CODE_FOR_jump_compact
4897 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4898 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4900 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4902 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4904 int addr = INSN_ADDRESSES (INSN_UID (insn));
4907 struct far_branch *bp;
4909 if (type == TYPE_JUMP)
4911 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4912 dest_uid = get_dest_uid (far_label, max_uid);
4915 /* Parse errors can lead to labels outside
4917 if (! NEXT_INSN (far_label))
4922 JUMP_LABEL (insn) = far_label;
4923 LABEL_NUSES (far_label)++;
4925 redirect_jump (insn, NULL_RTX, 1);
4929 bp = uid_branch[dest_uid];
4932 bp = (struct far_branch *) alloca (sizeof *bp);
4933 uid_branch[dest_uid] = bp;
4934 bp->prev = far_branch_list;
4935 far_branch_list = bp;
4937 bp->far_label = far_label;
4939 LABEL_NUSES (far_label)++;
4941 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4942 if (addr - bp->address <= CONDJUMP_MAX)
4943 emit_label_after (bp->near_label, PREV_INSN (insn));
4946 gen_far_branch (bp);
4952 bp->insert_place = insn;
4954 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4956 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4959 /* Generate all pending far branches,
4960 and free our references to the far labels. */
4961 while (far_branch_list)
4963 if (far_branch_list->near_label
4964 && ! NEXT_INSN (far_branch_list->near_label))
4965 gen_far_branch (far_branch_list);
4967 && far_branch_list->far_label
4968 && ! --LABEL_NUSES (far_branch_list->far_label))
4969 delete_insn (far_branch_list->far_label);
4970 far_branch_list = far_branch_list->prev;
4973 /* Instruction length information is no longer valid due to the new
4974 instructions that have been generated. */
4975 init_insn_lengths ();
4978 /* Dump out instruction addresses, which is useful for debugging the
4979 constant pool table stuff.
4981 If relaxing, output the label and pseudo-ops used to link together
4982 calls and the instruction which set the registers. */
4984 /* ??? The addresses printed by this routine for insns are nonsense for
4985 insns which are inside of a sequence where none of the inner insns have
4986 variable length. This is because the second pass of shorten_branches
4987 does not bother to update them. */
4990 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4991 int noperands ATTRIBUTE_UNUSED)
4993 if (TARGET_DUMPISIZE)
4994 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5000 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5005 pattern = PATTERN (insn);
5006 if (GET_CODE (pattern) == PARALLEL)
5007 pattern = XVECEXP (pattern, 0, 0);
5008 switch (GET_CODE (pattern))
5011 if (GET_CODE (SET_SRC (pattern)) != CALL
5012 && get_attr_type (insn) != TYPE_SFUNC)
5014 targetm.asm_out.internal_label
5015 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5018 /* else FALLTHROUGH */
5020 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5021 CODE_LABEL_NUMBER (XEXP (note, 0)));
5031 /* Dump out any constants accumulated in the final pass. These will
5035 output_jump_label_table (void)
5041 fprintf (asm_out_file, "\t.align 2\n");
5042 for (i = 0; i < pool_size; i++)
5044 pool_node *p = &pool_vector[i];
5046 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5047 CODE_LABEL_NUMBER (p->label));
5048 output_asm_insn (".long %O0", &p->value);
5056 /* A full frame looks like:
5060 [ if current_function_anonymous_args
5073 local-0 <- fp points here. */
5075 /* Number of bytes pushed for anonymous args, used to pass information
5076 between expand_prologue and expand_epilogue. */
5078 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5079 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5080 for an epilogue and a negative value means that it's for a sibcall
5081 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5082 all the registers that are about to be restored, and hence dead. */
5085 output_stack_adjust (int size, rtx reg, int epilogue_p,
5086 HARD_REG_SET *live_regs_mask)
5088 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5091 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5093 /* This test is bogus, as output_stack_adjust is used to re-align the
5096 gcc_assert (!(size % align));
5099 if (CONST_OK_FOR_ADD (size))
5100 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5101 /* Try to do it with two partial adjustments; however, we must make
5102 sure that the stack is properly aligned at all times, in case
5103 an interrupt occurs between the two partial adjustments. */
5104 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5105 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5107 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5108 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5114 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5117 /* If TEMP is invalid, we could temporarily save a general
5118 register to MACL. However, there is currently no need
5119 to handle this case, so just die when we see it. */
5121 || current_function_interrupt
5122 || ! call_really_used_regs[temp] || fixed_regs[temp])
5124 if (temp < 0 && ! current_function_interrupt
5125 && (TARGET_SHMEDIA || epilogue_p >= 0))
5128 COPY_HARD_REG_SET (temps, call_used_reg_set);
5129 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5133 if (current_function_return_rtx)
5135 enum machine_mode mode;
5136 mode = GET_MODE (current_function_return_rtx);
5137 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5138 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5140 for (i = 0; i < nreg; i++)
5141 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5142 if (current_function_calls_eh_return)
5144 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5145 for (i = 0; i <= 3; i++)
5146 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5149 if (TARGET_SHMEDIA && epilogue_p < 0)
5150 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5151 CLEAR_HARD_REG_BIT (temps, i);
5152 if (epilogue_p <= 0)
5154 for (i = FIRST_PARM_REG;
5155 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5156 CLEAR_HARD_REG_BIT (temps, i);
5157 if (cfun->static_chain_decl != NULL)
5158 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5160 temp = scavenge_reg (&temps);
5162 if (temp < 0 && live_regs_mask)
5163 temp = scavenge_reg (live_regs_mask);
5166 rtx adj_reg, tmp_reg, mem;
5168 /* If we reached here, the most likely case is the (sibcall)
5169 epilogue for non SHmedia. Put a special push/pop sequence
5170 for such case as the last resort. This looks lengthy but
5171 would not be problem because it seems to be very
5174 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5177 /* ??? There is still the slight possibility that r4 or
5178 r5 have been reserved as fixed registers or assigned
5179 as global registers, and they change during an
5180 interrupt. There are possible ways to handle this:
5182 - If we are adjusting the frame pointer (r14), we can do
5183 with a single temp register and an ordinary push / pop
5185 - Grab any call-used or call-saved registers (i.e. not
5186 fixed or globals) for the temps we need. We might
5187 also grab r14 if we are adjusting the stack pointer.
5188 If we can't find enough available registers, issue
5189 a diagnostic and die - the user must have reserved
5190 way too many registers.
5191 But since all this is rather unlikely to happen and
5192 would require extra testing, we just die if r4 / r5
5193 are not available. */
5194 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5195 && !global_regs[4] && !global_regs[5]);
5197 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5198 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5199 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5200 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5201 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5202 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5203 emit_move_insn (mem, tmp_reg);
5204 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5205 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5206 emit_move_insn (mem, tmp_reg);
5207 emit_move_insn (reg, adj_reg);
5208 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5209 emit_move_insn (adj_reg, mem);
5210 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5211 emit_move_insn (tmp_reg, mem);
5214 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5216 /* If SIZE is negative, subtract the positive value.
5217 This sometimes allows a constant pool entry to be shared
5218 between prologue and epilogue code. */
5221 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5222 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5226 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5227 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5231 = (gen_rtx_EXPR_LIST
5232 (REG_FRAME_RELATED_EXPR,
5233 gen_rtx_SET (VOIDmode, reg,
5234 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5244 RTX_FRAME_RELATED_P (x) = 1;
5248 /* Output RTL to push register RN onto the stack. */
5255 x = gen_push_fpul ();
5256 else if (rn == FPSCR_REG)
5257 x = gen_push_fpscr ();
5258 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5259 && FP_OR_XD_REGISTER_P (rn))
5261 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5263 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5265 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5266 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5268 x = gen_push (gen_rtx_REG (SImode, rn));
5272 = gen_rtx_EXPR_LIST (REG_INC,
5273 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5277 /* Output RTL to pop register RN from the stack. */
5284 x = gen_pop_fpul ();
5285 else if (rn == FPSCR_REG)
5286 x = gen_pop_fpscr ();
5287 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5288 && FP_OR_XD_REGISTER_P (rn))
5290 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5292 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5294 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5295 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5297 x = gen_pop (gen_rtx_REG (SImode, rn));
5301 = gen_rtx_EXPR_LIST (REG_INC,
5302 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5305 /* Generate code to push the regs specified in the mask. */
5308 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5313 /* Push PR last; this gives better latencies after the prologue, and
5314 candidates for the return delay slot when there are no general
5315 registers pushed. */
5316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5318 /* If this is an interrupt handler, and the SZ bit varies,
5319 and we have to push any floating point register, we need
5320 to switch to the correct precision first. */
5321 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5322 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5324 HARD_REG_SET unsaved;
5327 COMPL_HARD_REG_SET (unsaved, *mask);
5328 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5332 && (i != FPSCR_REG || ! skip_fpscr)
5333 && TEST_HARD_REG_BIT (*mask, i))
5336 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5340 /* Calculate how much extra space is needed to save all callee-saved
5342 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5345 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5348 int stack_space = 0;
5349 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5351 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5352 if ((! call_really_used_regs[reg] || interrupt_handler)
5353 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5354 /* Leave space to save this target register on the stack,
5355 in case target register allocation wants to use it. */
5356 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5360 /* Decide whether we should reserve space for callee-save target registers,
5361 in case target register allocation wants to use them. REGS_SAVED is
5362 the space, in bytes, that is already required for register saves.
5363 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5366 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5367 HARD_REG_SET *live_regs_mask)
5371 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5374 /* Decide how much space to reserve for callee-save target registers
5375 in case target register allocation wants to use them.
5376 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5379 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5381 if (shmedia_space_reserved_for_target_registers)
5382 return shmedia_target_regs_stack_space (live_regs_mask);
5387 /* Work out the registers which need to be saved, both as a mask and a
5388 count of saved words. Return the count.
5390 If doing a pragma interrupt function, then push all regs used by the
5391 function, and if we call another function (we can tell by looking at PR),
5392 make sure that all the regs it clobbers are safe too. */
5395 calc_live_regs (HARD_REG_SET *live_regs_mask)
5400 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5401 bool nosave_low_regs;
5402 int pr_live, has_call;
5404 attrs = DECL_ATTRIBUTES (current_function_decl);
5405 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5406 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5407 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5408 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5410 CLEAR_HARD_REG_SET (*live_regs_mask);
5411 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5412 && regs_ever_live[FPSCR_REG])
5413 target_flags &= ~MASK_FPU_SINGLE;
5414 /* If we can save a lot of saves by switching to double mode, do that. */
5415 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5416 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5417 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5418 && (! call_really_used_regs[reg]
5419 || interrupt_handler)
5422 target_flags &= ~MASK_FPU_SINGLE;
5425 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5426 knows how to use it. That means the pseudo originally allocated for
5427 the initial value can become the PR_MEDIA_REG hard register, as seen for
5428 execute/20010122-1.c:test9. */
5430 /* ??? this function is called from initial_elimination_offset, hence we
5431 can't use the result of sh_media_register_for_return here. */
5432 pr_live = sh_pr_n_sets ();
5435 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5436 pr_live = (pr_initial
5437 ? (GET_CODE (pr_initial) != REG
5438 || REGNO (pr_initial) != (PR_REG))
5439 : regs_ever_live[PR_REG]);
5440 /* For Shcompact, if not optimizing, we end up with a memory reference
5441 using the return address pointer for __builtin_return_address even
5442 though there is no actual need to put the PR register on the stack. */
5443 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5445 /* Force PR to be live if the prologue has to call the SHmedia
5446 argument decoder or register saver. */
5447 if (TARGET_SHCOMPACT
5448 && ((current_function_args_info.call_cookie
5449 & ~ CALL_COOKIE_RET_TRAMP (1))
5450 || current_function_has_nonlocal_label))
5452 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5453 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5455 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5458 ? (/* Need to save all the regs ever live. */
5459 (regs_ever_live[reg]
5460 || (call_really_used_regs[reg]
5461 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5462 || reg == PIC_OFFSET_TABLE_REGNUM)
5464 || (TARGET_SHMEDIA && has_call
5465 && REGISTER_NATURAL_MODE (reg) == SImode
5466 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5467 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5468 && reg != RETURN_ADDRESS_POINTER_REGNUM
5469 && reg != T_REG && reg != GBR_REG
5470 /* Push fpscr only on targets which have FPU */
5471 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5472 : (/* Only push those regs which are used and need to be saved. */
5475 && current_function_args_info.call_cookie
5476 && reg == PIC_OFFSET_TABLE_REGNUM)
5477 || (regs_ever_live[reg]
5478 && (!call_really_used_regs[reg]
5479 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5480 || (current_function_calls_eh_return
5481 && (reg == EH_RETURN_DATA_REGNO (0)
5482 || reg == EH_RETURN_DATA_REGNO (1)
5483 || reg == EH_RETURN_DATA_REGNO (2)
5484 || reg == EH_RETURN_DATA_REGNO (3)))
5485 || ((reg == MACL_REG || reg == MACH_REG)
5486 && regs_ever_live[reg]
5487 && sh_cfun_attr_renesas_p ())
5490 SET_HARD_REG_BIT (*live_regs_mask, reg);
5491 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5493 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5494 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5496 if (FP_REGISTER_P (reg))
5498 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5500 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5501 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5504 else if (XD_REGISTER_P (reg))
5506 /* Must switch to double mode to access these registers. */
5507 target_flags &= ~MASK_FPU_SINGLE;
5511 if (nosave_low_regs && reg == R8_REG)
5514 /* If we have a target register optimization pass after prologue / epilogue
5515 threading, we need to assume all target registers will be live even if
5517 if (flag_branch_target_load_optimize2
5518 && TARGET_SAVE_ALL_TARGET_REGS
5519 && shmedia_space_reserved_for_target_registers)
5520 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5521 if ((! call_really_used_regs[reg] || interrupt_handler)
5522 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5524 SET_HARD_REG_BIT (*live_regs_mask, reg);
5525 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5527 /* If this is an interrupt handler, we don't have any call-clobbered
5528 registers we can conveniently use for target register save/restore.
5529 Make sure we save at least one general purpose register when we need
5530 to save target registers. */
5531 if (interrupt_handler
5532 && hard_regs_intersect_p (live_regs_mask,
5533 ®_class_contents[TARGET_REGS])
5534 && ! hard_regs_intersect_p (live_regs_mask,
5535 ®_class_contents[GENERAL_REGS]))
5537 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5538 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5544 /* Code to generate prologue and epilogue sequences */
5546 /* PUSHED is the number of bytes that are being pushed on the
5547 stack for register saves. Return the frame size, padded
5548 appropriately so that the stack stays properly aligned. */
5549 static HOST_WIDE_INT
5550 rounded_frame_size (int pushed)
5552 HOST_WIDE_INT size = get_frame_size ();
5553 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5555 return ((size + pushed + align - 1) & -align) - pushed;
5558 /* Choose a call-clobbered target-branch register that remains
5559 unchanged along the whole function. We set it up as the return
5560 value in the prologue. */
5562 sh_media_register_for_return (void)
5567 if (! current_function_is_leaf)
5569 if (lookup_attribute ("interrupt_handler",
5570 DECL_ATTRIBUTES (current_function_decl)))
5572 if (sh_cfun_interrupt_handler_p ())
5575 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5577 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5578 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5584 /* The maximum registers we need to save are:
5585 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5586 - 32 floating point registers (for each pair, we save none,
5587 one single precision value, or a double precision value).
5588 - 8 target registers
5589 - add 1 entry for a delimiter. */
5590 #define MAX_SAVED_REGS (62+32+8)
5592 typedef struct save_entry_s
5601 /* There will be a delimiter entry with VOIDmode both at the start and the
5602 end of a filled in schedule. The end delimiter has the offset of the
5603 save with the smallest (i.e. most negative) offset. */
5604 typedef struct save_schedule_s
5606 save_entry entries[MAX_SAVED_REGS + 2];
5607 int temps[MAX_TEMPS+1];
5610 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5611 use reverse order. Returns the last entry written to (not counting
5612 the delimiter). OFFSET_BASE is a number to be added to all offset
5616 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5620 save_entry *entry = schedule->entries;
5624 if (! current_function_interrupt)
5625 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5626 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5627 && ! FUNCTION_ARG_REGNO_P (i)
5628 && i != FIRST_RET_REG
5629 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5630 && ! (current_function_calls_eh_return
5631 && (i == EH_RETURN_STACKADJ_REGNO
5632 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5633 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5634 schedule->temps[tmpx++] = i;
5636 entry->mode = VOIDmode;
5637 entry->offset = offset_base;
5639 /* We loop twice: first, we save 8-byte aligned registers in the
5640 higher addresses, that are known to be aligned. Then, we
5641 proceed to saving 32-bit registers that don't need 8-byte
5643 If this is an interrupt function, all registers that need saving
5644 need to be saved in full. moreover, we need to postpone saving
5645 target registers till we have saved some general purpose registers
5646 we can then use as scratch registers. */
5647 offset = offset_base;
5648 for (align = 1; align >= 0; align--)
5650 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5651 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5653 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5656 if (current_function_interrupt)
5658 if (TARGET_REGISTER_P (i))
5660 if (GENERAL_REGISTER_P (i))
5663 if (mode == SFmode && (i % 2) == 1
5664 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5665 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5672 /* If we're doing the aligned pass and this is not aligned,
5673 or we're doing the unaligned pass and this is aligned,
5675 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5679 if (current_function_interrupt
5680 && GENERAL_REGISTER_P (i)
5681 && tmpx < MAX_TEMPS)
5682 schedule->temps[tmpx++] = i;
5684 offset -= GET_MODE_SIZE (mode);
5687 entry->offset = offset;
5690 if (align && current_function_interrupt)
5691 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5692 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5694 offset -= GET_MODE_SIZE (DImode);
5696 entry->mode = DImode;
5697 entry->offset = offset;
5702 entry->mode = VOIDmode;
5703 entry->offset = offset;
5704 schedule->temps[tmpx] = -1;
5709 sh_expand_prologue (void)
5711 HARD_REG_SET live_regs_mask;
5714 int save_flags = target_flags;
5717 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5719 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5721 /* We have pretend args if we had an object sent partially in registers
5722 and partially on the stack, e.g. a large structure. */
5723 pretend_args = current_function_pretend_args_size;
5724 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5725 && (NPARM_REGS(SImode)
5726 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5728 output_stack_adjust (-pretend_args
5729 - current_function_args_info.stack_regs * 8,
5730 stack_pointer_rtx, 0, NULL);
5732 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5733 /* We're going to use the PIC register to load the address of the
5734 incoming-argument decoder and/or of the return trampoline from
5735 the GOT, so make sure the PIC register is preserved and
5737 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5739 if (TARGET_SHCOMPACT
5740 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5744 /* First, make all registers with incoming arguments that will
5745 be pushed onto the stack live, so that register renaming
5746 doesn't overwrite them. */
5747 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5748 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5749 >= NPARM_REGS (SImode) - reg)
5750 for (; reg < NPARM_REGS (SImode); reg++)
5751 emit_insn (gen_shcompact_preserve_incoming_args
5752 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5753 else if (CALL_COOKIE_INT_REG_GET
5754 (current_function_args_info.call_cookie, reg) == 1)
5755 emit_insn (gen_shcompact_preserve_incoming_args
5756 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5758 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5760 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5761 GEN_INT (current_function_args_info.call_cookie));
5762 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5763 gen_rtx_REG (SImode, R0_REG));
5765 else if (TARGET_SHMEDIA)
5767 int tr = sh_media_register_for_return ();
5771 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5772 gen_rtx_REG (DImode, PR_MEDIA_REG));
5774 /* ??? We should suppress saving pr when we don't need it, but this
5775 is tricky because of builtin_return_address. */
5777 /* If this function only exits with sibcalls, this copy
5778 will be flagged as dead. */
5779 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5785 /* Emit the code for SETUP_VARARGS. */
5786 if (current_function_stdarg)
5788 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5790 /* Push arg regs as if they'd been provided by caller in stack. */
5791 for (i = 0; i < NPARM_REGS(SImode); i++)
5793 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5796 if (i >= (NPARM_REGS(SImode)
5797 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5801 RTX_FRAME_RELATED_P (insn) = 0;
5806 /* If we're supposed to switch stacks at function entry, do so now. */
5809 /* The argument specifies a variable holding the address of the
5810 stack the interrupt function should switch to/from at entry/exit. */
5812 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5813 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5815 emit_insn (gen_sp_switch_1 (sp_switch));
5818 d = calc_live_regs (&live_regs_mask);
5819 /* ??? Maybe we could save some switching if we can move a mode switch
5820 that already happens to be at the function start into the prologue. */
5821 if (target_flags != save_flags && ! current_function_interrupt)
5822 emit_insn (gen_toggle_sz ());
5826 int offset_base, offset;
5828 int offset_in_r0 = -1;
5830 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5831 int total_size, save_size;
5832 save_schedule schedule;
5836 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5837 && ! current_function_interrupt)
5838 r0 = gen_rtx_REG (Pmode, R0_REG);
5840 /* D is the actual number of bytes that we need for saving registers,
5841 however, in initial_elimination_offset we have committed to using
5842 an additional TREGS_SPACE amount of bytes - in order to keep both
5843 addresses to arguments supplied by the caller and local variables
5844 valid, we must keep this gap. Place it between the incoming
5845 arguments and the actually saved registers in a bid to optimize
5846 locality of reference. */
5847 total_size = d + tregs_space;
5848 total_size += rounded_frame_size (total_size);
5849 save_size = total_size - rounded_frame_size (d);
5850 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5851 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5852 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5854 /* If adjusting the stack in a single step costs nothing extra, do so.
5855 I.e. either if a single addi is enough, or we need a movi anyway,
5856 and we don't exceed the maximum offset range (the test for the
5857 latter is conservative for simplicity). */
5859 && (CONST_OK_FOR_I10 (-total_size)
5860 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5861 && total_size <= 2044)))
5862 d_rounding = total_size - save_size;
5864 offset_base = d + d_rounding;
5866 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5869 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5870 tmp_pnt = schedule.temps;
5871 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5873 enum machine_mode mode = entry->mode;
5874 unsigned int reg = entry->reg;
5875 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5878 offset = entry->offset;
5880 reg_rtx = gen_rtx_REG (mode, reg);
5882 mem_rtx = gen_frame_mem (mode,
5883 gen_rtx_PLUS (Pmode,
5887 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5894 if (HAVE_PRE_DECREMENT
5895 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5896 || mem_rtx == NULL_RTX
5897 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5899 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
5901 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5910 offset += GET_MODE_SIZE (mode);
5914 if (mem_rtx != NULL_RTX)
5917 if (offset_in_r0 == -1)
5919 emit_move_insn (r0, GEN_INT (offset));
5920 offset_in_r0 = offset;
5922 else if (offset != offset_in_r0)
5927 GEN_INT (offset - offset_in_r0)));
5928 offset_in_r0 += offset - offset_in_r0;
5931 if (pre_dec != NULL_RTX)
5937 (Pmode, r0, stack_pointer_rtx));
5941 offset -= GET_MODE_SIZE (mode);
5942 offset_in_r0 -= GET_MODE_SIZE (mode);
5947 mem_rtx = gen_frame_mem (mode, r0);
5949 mem_rtx = gen_frame_mem (mode,
5950 gen_rtx_PLUS (Pmode,
5954 /* We must not use an r0-based address for target-branch
5955 registers or for special registers without pre-dec
5956 memory addresses, since we store their values in r0
5958 gcc_assert (!TARGET_REGISTER_P (reg)
5959 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5960 || mem_rtx == pre_dec));
5963 orig_reg_rtx = reg_rtx;
5964 if (TARGET_REGISTER_P (reg)
5965 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5966 && mem_rtx != pre_dec))
5968 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5970 emit_move_insn (tmp_reg, reg_rtx);
5972 if (REGNO (tmp_reg) == R0_REG)
5976 gcc_assert (!refers_to_regno_p
5977 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5980 if (*++tmp_pnt <= 0)
5981 tmp_pnt = schedule.temps;
5988 /* Mark as interesting for dwarf cfi generator */
5989 insn = emit_move_insn (mem_rtx, reg_rtx);
5990 RTX_FRAME_RELATED_P (insn) = 1;
5991 /* If we use an intermediate register for the save, we can't
5992 describe this exactly in cfi as a copy of the to-be-saved
5993 register into the temporary register and then the temporary
5994 register on the stack, because the temporary register can
5995 have a different natural size than the to-be-saved register.
5996 Thus, we gloss over the intermediate copy and pretend we do
5997 a direct save from the to-be-saved register. */
5998 if (REGNO (reg_rtx) != reg)
6002 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6003 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6005 REG_NOTES (insn) = note_rtx;
6008 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6010 rtx reg_rtx = gen_rtx_REG (mode, reg);
6012 rtx mem_rtx = gen_frame_mem (mode,
6013 gen_rtx_PLUS (Pmode,
6017 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6018 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6020 REG_NOTES (insn) = note_rtx;
6025 gcc_assert (entry->offset == d_rounding);
6028 push_regs (&live_regs_mask, current_function_interrupt);
6030 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6032 rtx insn = get_last_insn ();
6033 rtx last = emit_insn (gen_GOTaddr2picreg ());
6035 /* Mark these insns as possibly dead. Sometimes, flow2 may
6036 delete all uses of the PIC register. In this case, let it
6037 delete the initialization too. */
6040 insn = NEXT_INSN (insn);
6042 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6046 while (insn != last);
6049 if (SHMEDIA_REGS_STACK_ADJUST ())
6051 /* This must NOT go through the PLT, otherwise mach and macl
6052 may be clobbered. */
6053 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6055 ? "__GCC_push_shmedia_regs"
6056 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6057 emit_insn (gen_shmedia_save_restore_regs_compact
6058 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6061 if (target_flags != save_flags && ! current_function_interrupt)
6063 rtx insn = emit_insn (gen_toggle_sz ());
6065 /* If we're lucky, a mode switch in the function body will
6066 overwrite fpscr, turning this insn dead. Tell flow this
6067 insn is ok to delete. */
6068 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6073 target_flags = save_flags;
6075 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6076 stack_pointer_rtx, 0, NULL);
6078 if (frame_pointer_needed)
6079 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6081 if (TARGET_SHCOMPACT
6082 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6084 /* This must NOT go through the PLT, otherwise mach and macl
6085 may be clobbered. */
6086 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6087 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6088 emit_insn (gen_shcompact_incoming_args ());
6093 sh_expand_epilogue (bool sibcall_p)
6095 HARD_REG_SET live_regs_mask;
6099 int save_flags = target_flags;
6100 int frame_size, save_size;
6101 int fpscr_deferred = 0;
6102 int e = sibcall_p ? -1 : 1;
6104 d = calc_live_regs (&live_regs_mask);
6107 frame_size = rounded_frame_size (d);
6111 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6113 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6114 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6115 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6117 total_size = d + tregs_space;
6118 total_size += rounded_frame_size (total_size);
6119 save_size = total_size - frame_size;
6121 /* If adjusting the stack in a single step costs nothing extra, do so.
6122 I.e. either if a single addi is enough, or we need a movi anyway,
6123 and we don't exceed the maximum offset range (the test for the
6124 latter is conservative for simplicity). */
6126 && ! frame_pointer_needed
6127 && (CONST_OK_FOR_I10 (total_size)
6128 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6129 && total_size <= 2044)))
6130 d_rounding = frame_size;
6132 frame_size -= d_rounding;
6135 if (frame_pointer_needed)
6137 /* We must avoid scheduling the epilogue with previous basic blocks
6138 when exception handling is enabled. See PR/18032. */
6139 if (flag_exceptions)
6140 emit_insn (gen_blockage ());
6141 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6144 /* We must avoid moving the stack pointer adjustment past code
6145 which reads from the local frame, else an interrupt could
6146 occur after the SP adjustment and clobber data in the local
6148 emit_insn (gen_blockage ());
6149 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6151 else if (frame_size)
6153 /* We must avoid moving the stack pointer adjustment past code
6154 which reads from the local frame, else an interrupt could
6155 occur after the SP adjustment and clobber data in the local
6157 emit_insn (gen_blockage ());
6158 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6161 if (SHMEDIA_REGS_STACK_ADJUST ())
6163 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6165 ? "__GCC_pop_shmedia_regs"
6166 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6167 /* This must NOT go through the PLT, otherwise mach and macl
6168 may be clobbered. */
6169 emit_insn (gen_shmedia_save_restore_regs_compact
6170 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6173 /* Pop all the registers. */
6175 if (target_flags != save_flags && ! current_function_interrupt)
6176 emit_insn (gen_toggle_sz ());
6179 int offset_base, offset;
6180 int offset_in_r0 = -1;
6182 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6183 save_schedule schedule;
6187 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6188 offset_base = -entry[1].offset + d_rounding;
6189 tmp_pnt = schedule.temps;
6190 for (; entry->mode != VOIDmode; entry--)
6192 enum machine_mode mode = entry->mode;
6193 int reg = entry->reg;
6194 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6196 offset = offset_base + entry->offset;
6197 reg_rtx = gen_rtx_REG (mode, reg);
6199 mem_rtx = gen_frame_mem (mode,
6200 gen_rtx_PLUS (Pmode,
6204 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6210 if (HAVE_POST_INCREMENT
6211 && (offset == offset_in_r0
6212 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6213 && mem_rtx == NULL_RTX)
6214 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6216 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6218 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6221 post_inc = NULL_RTX;
6230 if (mem_rtx != NULL_RTX)
6233 if (offset_in_r0 == -1)
6235 emit_move_insn (r0, GEN_INT (offset));
6236 offset_in_r0 = offset;
6238 else if (offset != offset_in_r0)
6243 GEN_INT (offset - offset_in_r0)));
6244 offset_in_r0 += offset - offset_in_r0;
6247 if (post_inc != NULL_RTX)
6253 (Pmode, r0, stack_pointer_rtx));
6259 offset_in_r0 += GET_MODE_SIZE (mode);
6262 mem_rtx = gen_frame_mem (mode, r0);
6264 mem_rtx = gen_frame_mem (mode,
6265 gen_rtx_PLUS (Pmode,
6269 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6270 || mem_rtx == post_inc);
6273 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6274 && mem_rtx != post_inc)
6276 insn = emit_move_insn (r0, mem_rtx);
6279 else if (TARGET_REGISTER_P (reg))
6281 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6283 /* Give the scheduler a bit of freedom by using up to
6284 MAX_TEMPS registers in a round-robin fashion. */
6285 insn = emit_move_insn (tmp_reg, mem_rtx);
6288 tmp_pnt = schedule.temps;
6291 insn = emit_move_insn (reg_rtx, mem_rtx);
6292 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6293 /* This is dead, unless we return with a sibcall. */
6294 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6299 gcc_assert (entry->offset + offset_base == d + d_rounding);
6301 else /* ! TARGET_SH5 */
6304 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6306 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6308 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6310 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6311 && hard_regs_intersect_p (&live_regs_mask,
6312 ®_class_contents[DF_REGS]))
6314 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6316 if (j == FIRST_FP_REG && fpscr_deferred)
6321 if (target_flags != save_flags && ! current_function_interrupt)
6322 emit_insn (gen_toggle_sz ());
6323 target_flags = save_flags;
6325 output_stack_adjust (current_function_pretend_args_size
6326 + save_size + d_rounding
6327 + current_function_args_info.stack_regs * 8,
6328 stack_pointer_rtx, e, NULL);
6330 if (current_function_calls_eh_return)
6331 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6332 EH_RETURN_STACKADJ_RTX));
6334 /* Switch back to the normal stack if necessary. */
6335 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6336 emit_insn (gen_sp_switch_2 ());
6338 /* Tell flow the insn that pops PR isn't dead. */
6339 /* PR_REG will never be live in SHmedia mode, and we don't need to
6340 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6341 by the return pattern. */
6342 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6343 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6346 static int sh_need_epilogue_known = 0;
6349 sh_need_epilogue (void)
6351 if (! sh_need_epilogue_known)
6356 sh_expand_epilogue (0);
6357 epilogue = get_insns ();
6359 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6361 return sh_need_epilogue_known > 0;
6364 /* Emit code to change the current function's return address to RA.
6365 TEMP is available as a scratch register, if needed. */
6368 sh_set_return_address (rtx ra, rtx tmp)
6370 HARD_REG_SET live_regs_mask;
6372 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6375 d = calc_live_regs (&live_regs_mask);
6377 /* If pr_reg isn't life, we can set it (or the register given in
6378 sh_media_register_for_return) directly. */
6379 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6385 int rr_regno = sh_media_register_for_return ();
6390 rr = gen_rtx_REG (DImode, rr_regno);
6393 rr = gen_rtx_REG (SImode, pr_reg);
6395 emit_insn (GEN_MOV (rr, ra));
6396 /* Tell flow the register for return isn't dead. */
6397 emit_insn (gen_rtx_USE (VOIDmode, rr));
6404 save_schedule schedule;
6407 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6408 offset = entry[1].offset;
6409 for (; entry->mode != VOIDmode; entry--)
6410 if (entry->reg == pr_reg)
6413 /* We can't find pr register. */
6417 offset = entry->offset - offset;
6418 pr_offset = (rounded_frame_size (d) + offset
6419 + SHMEDIA_REGS_STACK_ADJUST ());
6422 pr_offset = rounded_frame_size (d);
6424 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6425 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6427 tmp = gen_frame_mem (Pmode, tmp);
6428 emit_insn (GEN_MOV (tmp, ra));
6431 /* Clear variables at function end. */
6434 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6435 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6437 sh_need_epilogue_known = 0;
6441 sh_builtin_saveregs (void)
6443 /* First unnamed integer register. */
6444 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6445 /* Number of integer registers we need to save. */
6446 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6447 /* First unnamed SFmode float reg */
6448 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6449 /* Number of SFmode float regs to save. */
6450 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6453 HOST_WIDE_INT alias_set;
6459 int pushregs = n_intregs;
6461 while (pushregs < NPARM_REGS (SImode) - 1
6462 && (CALL_COOKIE_INT_REG_GET
6463 (current_function_args_info.call_cookie,
6464 NPARM_REGS (SImode) - pushregs)
6467 current_function_args_info.call_cookie
6468 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6473 if (pushregs == NPARM_REGS (SImode))
6474 current_function_args_info.call_cookie
6475 |= (CALL_COOKIE_INT_REG (0, 1)
6476 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6478 current_function_args_info.call_cookie
6479 |= CALL_COOKIE_STACKSEQ (pushregs);
6481 current_function_pretend_args_size += 8 * n_intregs;
6483 if (TARGET_SHCOMPACT)
6487 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6489 error ("__builtin_saveregs not supported by this subtarget");
6496 /* Allocate block of memory for the regs. */
6497 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6498 Or can assign_stack_local accept a 0 SIZE argument? */
6499 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6502 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6503 else if (n_floatregs & 1)
6507 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6508 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6509 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6510 regbuf = change_address (regbuf, BLKmode, addr);
6512 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6516 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6517 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6518 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6519 emit_insn (gen_andsi3 (addr, addr, mask));
6520 regbuf = change_address (regbuf, BLKmode, addr);
6523 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6524 alias_set = get_varargs_alias_set ();
6525 set_mem_alias_set (regbuf, alias_set);
6528 This is optimized to only save the regs that are necessary. Explicitly
6529 named args need not be saved. */
6531 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6532 adjust_address (regbuf, BLKmode,
6533 n_floatregs * UNITS_PER_WORD),
6537 /* Return the address of the regbuf. */
6538 return XEXP (regbuf, 0);
6541 This is optimized to only save the regs that are necessary. Explicitly
6542 named args need not be saved.
6543 We explicitly build a pointer to the buffer because it halves the insn
6544 count when not optimizing (otherwise the pointer is built for each reg
6546 We emit the moves in reverse order so that we can use predecrement. */
6548 fpregs = copy_to_mode_reg (Pmode,
6549 plus_constant (XEXP (regbuf, 0),
6550 n_floatregs * UNITS_PER_WORD));
6551 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6554 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6556 emit_insn (gen_addsi3 (fpregs, fpregs,
6557 GEN_INT (-2 * UNITS_PER_WORD)));
6558 mem = change_address (regbuf, DFmode, fpregs);
6559 emit_move_insn (mem,
6560 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6562 regno = first_floatreg;
6565 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6566 mem = change_address (regbuf, SFmode, fpregs);
6567 emit_move_insn (mem,
6568 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6569 - (TARGET_LITTLE_ENDIAN != 0)));
6573 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6577 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6578 mem = change_address (regbuf, SFmode, fpregs);
6579 emit_move_insn (mem,
6580 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6583 /* Return the address of the regbuf. */
6584 return XEXP (regbuf, 0);
6587 /* Define the `__builtin_va_list' type for the ABI. */
6590 sh_build_builtin_va_list (void)
6592 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6595 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6596 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6597 return ptr_type_node;
6599 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6601 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6603 f_next_o_limit = build_decl (FIELD_DECL,
6604 get_identifier ("__va_next_o_limit"),
6606 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6608 f_next_fp_limit = build_decl (FIELD_DECL,
6609 get_identifier ("__va_next_fp_limit"),
6611 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6614 DECL_FIELD_CONTEXT (f_next_o) = record;
6615 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6616 DECL_FIELD_CONTEXT (f_next_fp) = record;
6617 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6618 DECL_FIELD_CONTEXT (f_next_stack) = record;
6620 TYPE_FIELDS (record) = f_next_o;
6621 TREE_CHAIN (f_next_o) = f_next_o_limit;
6622 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6623 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6624 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6626 layout_type (record);
6631 /* Implement `va_start' for varargs and stdarg. */
6634 sh_va_start (tree valist, rtx nextarg)
6636 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6637 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6643 expand_builtin_saveregs ();
6644 std_expand_builtin_va_start (valist, nextarg);
6648 if ((! TARGET_SH2E && ! TARGET_SH4)
6649 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6651 std_expand_builtin_va_start (valist, nextarg);
6655 f_next_o = TYPE_FIELDS (va_list_type_node);
6656 f_next_o_limit = TREE_CHAIN (f_next_o);
6657 f_next_fp = TREE_CHAIN (f_next_o_limit);
6658 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6659 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6661 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6663 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6664 valist, f_next_o_limit, NULL_TREE);
6665 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6667 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6668 valist, f_next_fp_limit, NULL_TREE);
6669 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6670 valist, f_next_stack, NULL_TREE);
6672 /* Call __builtin_saveregs. */
6673 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6674 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
6675 TREE_SIDE_EFFECTS (t) = 1;
6676 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6678 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6683 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6684 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
6685 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6686 TREE_SIDE_EFFECTS (t) = 1;
6687 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6689 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
6690 TREE_SIDE_EFFECTS (t) = 1;
6691 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6693 nint = current_function_args_info.arg_count[SH_ARG_INT];
6698 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6699 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
6700 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6701 TREE_SIDE_EFFECTS (t) = 1;
6702 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6704 u = make_tree (ptr_type_node, nextarg);
6705 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
6706 TREE_SIDE_EFFECTS (t) = 1;
6707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6710 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6711 member, return it. */
6713 find_sole_member (tree type)
6715 tree field, member = NULL_TREE;
6717 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6719 if (TREE_CODE (field) != FIELD_DECL)
6721 if (!DECL_SIZE (field))
6723 if (integer_zerop (DECL_SIZE (field)))
6731 /* Implement `va_arg'. */
6734 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6735 tree *post_p ATTRIBUTE_UNUSED)
6737 HOST_WIDE_INT size, rsize;
6738 tree tmp, pptr_type_node;
6739 tree addr, lab_over = NULL, result = NULL;
6740 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6744 type = build_pointer_type (type);
6746 size = int_size_in_bytes (type);
6747 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6748 pptr_type_node = build_pointer_type (ptr_type_node);
6750 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6751 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6753 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6754 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6759 f_next_o = TYPE_FIELDS (va_list_type_node);
6760 f_next_o_limit = TREE_CHAIN (f_next_o);
6761 f_next_fp = TREE_CHAIN (f_next_o_limit);
6762 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6763 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6765 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6767 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6768 valist, f_next_o_limit, NULL_TREE);
6769 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
6770 valist, f_next_fp, NULL_TREE);
6771 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6772 valist, f_next_fp_limit, NULL_TREE);
6773 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6774 valist, f_next_stack, NULL_TREE);
6776 /* Structures with a single member with a distinct mode are passed
6777 like their member. This is relevant if the latter has a REAL_TYPE
6778 or COMPLEX_TYPE type. */
6780 while (TREE_CODE (eff_type) == RECORD_TYPE
6781 && (member = find_sole_member (eff_type))
6782 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6783 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6784 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6786 tree field_type = TREE_TYPE (member);
6788 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
6789 eff_type = field_type;
6792 gcc_assert ((TYPE_ALIGN (eff_type)
6793 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6794 || (TYPE_ALIGN (eff_type)
6795 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6802 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
6803 || (TREE_CODE (eff_type) == COMPLEX_TYPE
6804 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
6809 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
6812 addr = create_tmp_var (pptr_type_node, NULL);
6813 lab_false = create_artificial_label ();
6814 lab_over = create_artificial_label ();
6816 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6820 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6822 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
6824 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6825 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6826 gimplify_and_add (tmp, pre_p);
6828 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6829 gimplify_and_add (tmp, pre_p);
6830 tmp = next_fp_limit;
6831 if (size > 4 && !is_double)
6832 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6833 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6834 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6835 cmp = build3 (COND_EXPR, void_type_node, tmp,
6836 build1 (GOTO_EXPR, void_type_node, lab_false),
6839 gimplify_and_add (cmp, pre_p);
6841 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
6842 || (is_double || size == 16))
6844 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6845 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6846 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6847 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6848 gimplify_and_add (tmp, pre_p);
6851 gimplify_and_add (cmp, pre_p);
6853 #ifdef FUNCTION_ARG_SCmode_WART
6854 if (TYPE_MODE (eff_type) == SCmode
6855 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6857 tree subtype = TREE_TYPE (eff_type);
6861 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6862 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6865 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6866 real = get_initialized_tmp_var (real, pre_p, NULL);
6868 result = build2 (COMPLEX_EXPR, type, real, imag);
6869 result = get_initialized_tmp_var (result, pre_p, NULL);
6871 #endif /* FUNCTION_ARG_SCmode_WART */
6873 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
6874 gimplify_and_add (tmp, pre_p);
6876 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
6877 gimplify_and_add (tmp, pre_p);
6879 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6880 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6881 gimplify_and_add (tmp, pre_p);
6882 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6883 gimplify_and_add (tmp, pre_p);
6885 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6886 gimplify_and_add (tmp, post_p);
6887 valist = next_fp_tmp;
6891 tmp = fold_convert (ptr_type_node, size_int (rsize));
6892 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
6893 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6894 tmp = build3 (COND_EXPR, void_type_node, tmp,
6895 build1 (GOTO_EXPR, void_type_node, lab_false),
6897 gimplify_and_add (tmp, pre_p);
6899 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6900 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6901 gimplify_and_add (tmp, pre_p);
6903 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
6904 gimplify_and_add (tmp, pre_p);
6906 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
6907 gimplify_and_add (tmp, pre_p);
6909 if (size > 4 && ! TARGET_SH4)
6911 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6912 gimplify_and_add (tmp, pre_p);
6915 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6916 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6917 gimplify_and_add (tmp, pre_p);
6922 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
6923 gimplify_and_add (tmp, pre_p);
6927 /* ??? In va-sh.h, there had been code to make values larger than
6928 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6930 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6933 tmp = build2 (MODIFY_EXPR, void_type_node, result, tmp);
6934 gimplify_and_add (tmp, pre_p);
6936 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
6937 gimplify_and_add (tmp, pre_p);
6943 result = build_va_arg_indirect_ref (result);
6949 sh_promote_prototypes (tree type)
6955 return ! sh_attr_renesas_p (type);
6958 /* Whether an argument must be passed by reference. On SHcompact, we
6959 pretend arguments wider than 32-bits that would have been passed in
6960 registers are passed by reference, so that an SHmedia trampoline
6961 loads them into the full 64-bits registers. */
6964 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6965 tree type, bool named)
6967 unsigned HOST_WIDE_INT size;
6970 size = int_size_in_bytes (type);
6972 size = GET_MODE_SIZE (mode);
6974 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6976 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6977 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6978 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6980 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6981 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6988 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6989 tree type, bool named)
6991 if (targetm.calls.must_pass_in_stack (mode, type))
6994 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6995 wants to know about pass-by-reference semantics for incoming
7000 if (TARGET_SHCOMPACT)
7002 cum->byref = shcompact_byref (cum, mode, type, named);
7003 return cum->byref != 0;
7010 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7011 tree type, bool named ATTRIBUTE_UNUSED)
7013 /* ??? How can it possibly be correct to return true only on the
7014 caller side of the equation? Is there someplace else in the
7015 sh backend that's magically producing the copies? */
7016 return (cum->outgoing
7017 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7018 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7022 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7023 tree type, bool named ATTRIBUTE_UNUSED)
7028 && PASS_IN_REG_P (*cum, mode, type)
7029 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7030 && (ROUND_REG (*cum, mode)
7032 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7033 : ROUND_ADVANCE (int_size_in_bytes (type)))
7034 > NPARM_REGS (mode)))
7035 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7037 else if (!TARGET_SHCOMPACT
7038 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7039 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7041 return words * UNITS_PER_WORD;
7045 /* Define where to put the arguments to a function.
7046 Value is zero to push the argument on the stack,
7047 or a hard register in which to store the argument.
7049 MODE is the argument's machine mode.
7050 TYPE is the data type of the argument (as a tree).
7051 This is null for libcalls where that information may
7053 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7054 the preceding args and about the function being called.
7055 NAMED is nonzero if this argument is a named parameter
7056 (otherwise it is an extra parameter matching an ellipsis).
7058 On SH the first args are normally in registers
7059 and the rest are pushed. Any arg that starts within the first
7060 NPARM_REGS words is at least partially passed in a register unless
7061 its data type forbids. */
7065 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7066 tree type, int named)
7068 if (! TARGET_SH5 && mode == VOIDmode)
7069 return GEN_INT (ca->renesas_abi ? 1 : 0);
7072 && PASS_IN_REG_P (*ca, mode, type)
7073 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7077 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7078 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7080 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7081 gen_rtx_REG (SFmode,
7083 + (ROUND_REG (*ca, mode) ^ 1)),
7085 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7086 gen_rtx_REG (SFmode,
7088 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7090 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7093 /* If the alignment of a DF value causes an SF register to be
7094 skipped, we will use that skipped register for the next SF
7096 if ((TARGET_HITACHI || ca->renesas_abi)
7097 && ca->free_single_fp_reg
7099 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7101 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7102 ^ (mode == SFmode && TARGET_SH4
7103 && TARGET_LITTLE_ENDIAN != 0
7104 && ! TARGET_HITACHI && ! ca->renesas_abi);
7105 return gen_rtx_REG (mode, regno);
7111 if (mode == VOIDmode && TARGET_SHCOMPACT)
7112 return GEN_INT (ca->call_cookie);
7114 /* The following test assumes unnamed arguments are promoted to
7116 if (mode == SFmode && ca->free_single_fp_reg)
7117 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7119 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7120 && (named || ! ca->prototype_p)
7121 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7123 if (! ca->prototype_p && TARGET_SHMEDIA)
7124 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7126 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7128 + ca->arg_count[(int) SH_ARG_FLOAT]);
7131 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7132 && (! TARGET_SHCOMPACT
7133 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7134 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7137 return gen_rtx_REG (mode, (FIRST_PARM_REG
7138 + ca->arg_count[(int) SH_ARG_INT]));
7147 /* Update the data in CUM to advance over an argument
7148 of mode MODE and data type TYPE.
7149 (TYPE is null for libcalls where that information may not be
7153 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7154 tree type, int named)
7158 else if (TARGET_SH5)
7160 tree type2 = (ca->byref && type
7163 enum machine_mode mode2 = (ca->byref && type
7166 int dwords = ((ca->byref
7169 ? int_size_in_bytes (type2)
7170 : GET_MODE_SIZE (mode2)) + 7) / 8;
7171 int numregs = MIN (dwords, NPARM_REGS (SImode)
7172 - ca->arg_count[(int) SH_ARG_INT]);
7176 ca->arg_count[(int) SH_ARG_INT] += numregs;
7177 if (TARGET_SHCOMPACT
7178 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7181 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7183 /* N.B. We want this also for outgoing. */
7184 ca->stack_regs += numregs;
7189 ca->stack_regs += numregs;
7190 ca->byref_regs += numregs;
7194 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7198 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7201 else if (dwords > numregs)
7203 int pushregs = numregs;
7205 if (TARGET_SHCOMPACT)
7206 ca->stack_regs += numregs;
7207 while (pushregs < NPARM_REGS (SImode) - 1
7208 && (CALL_COOKIE_INT_REG_GET
7210 NPARM_REGS (SImode) - pushregs)
7214 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7218 if (numregs == NPARM_REGS (SImode))
7220 |= CALL_COOKIE_INT_REG (0, 1)
7221 | CALL_COOKIE_STACKSEQ (numregs - 1);
7224 |= CALL_COOKIE_STACKSEQ (numregs);
7227 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7228 && (named || ! ca->prototype_p))
7230 if (mode2 == SFmode && ca->free_single_fp_reg)
7231 ca->free_single_fp_reg = 0;
7232 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7233 < NPARM_REGS (SFmode))
7236 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7238 - ca->arg_count[(int) SH_ARG_FLOAT]);
7240 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7242 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7244 if (ca->outgoing && numregs > 0)
7248 |= (CALL_COOKIE_INT_REG
7249 (ca->arg_count[(int) SH_ARG_INT]
7250 - numregs + ((numfpregs - 2) / 2),
7251 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7254 while (numfpregs -= 2);
7256 else if (mode2 == SFmode && (named)
7257 && (ca->arg_count[(int) SH_ARG_FLOAT]
7258 < NPARM_REGS (SFmode)))
7259 ca->free_single_fp_reg
7260 = FIRST_FP_PARM_REG - numfpregs
7261 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7267 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7269 /* Note that we've used the skipped register. */
7270 if (mode == SFmode && ca->free_single_fp_reg)
7272 ca->free_single_fp_reg = 0;
7275 /* When we have a DF after an SF, there's an SF register that get
7276 skipped in order to align the DF value. We note this skipped
7277 register, because the next SF value will use it, and not the
7278 SF that follows the DF. */
7280 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7282 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7283 + BASE_ARG_REG (mode));
7287 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7288 || PASS_IN_REG_P (*ca, mode, type))
7289 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7290 = (ROUND_REG (*ca, mode)
7292 ? ROUND_ADVANCE (int_size_in_bytes (type))
7293 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7296 /* The Renesas calling convention doesn't quite fit into this scheme since
7297 the address is passed like an invisible argument, but one that is always
7298 passed in memory. */
7300 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7302 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7304 return gen_rtx_REG (Pmode, 2);
7307 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7310 sh_return_in_memory (tree type, tree fndecl)
7314 if (TYPE_MODE (type) == BLKmode)
7315 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7317 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7321 return (TYPE_MODE (type) == BLKmode
7322 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7323 && TREE_CODE (type) == RECORD_TYPE));
7327 /* We actually emit the code in sh_expand_prologue. We used to use
7328 a static variable to flag that we need to emit this code, but that
7329 doesn't when inlining, when functions are deferred and then emitted
7330 later. Fortunately, we already have two flags that are part of struct
7331 function that tell if a function uses varargs or stdarg. */
7333 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7334 enum machine_mode mode,
7336 int *pretend_arg_size,
7337 int second_time ATTRIBUTE_UNUSED)
7339 gcc_assert (current_function_stdarg);
7340 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7342 int named_parm_regs, anon_parm_regs;
7344 named_parm_regs = (ROUND_REG (*ca, mode)
7346 ? ROUND_ADVANCE (int_size_in_bytes (type))
7347 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7348 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7349 if (anon_parm_regs > 0)
7350 *pretend_arg_size = anon_parm_regs * 4;
7355 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7361 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7363 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7367 /* Define the offset between two registers, one to be eliminated, and
7368 the other its replacement, at the start of a routine. */
7371 initial_elimination_offset (int from, int to)
7374 int regs_saved_rounding = 0;
7375 int total_saved_regs_space;
7376 int total_auto_space;
7377 int save_flags = target_flags;
7379 HARD_REG_SET live_regs_mask;
7381 shmedia_space_reserved_for_target_registers = false;
7382 regs_saved = calc_live_regs (&live_regs_mask);
7383 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7385 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7387 shmedia_space_reserved_for_target_registers = true;
7388 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7391 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7392 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7393 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7395 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7396 copy_flags = target_flags;
7397 target_flags = save_flags;
7399 total_saved_regs_space = regs_saved + regs_saved_rounding;
7401 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7402 return total_saved_regs_space + total_auto_space
7403 + current_function_args_info.byref_regs * 8;
7405 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7406 return total_saved_regs_space + total_auto_space
7407 + current_function_args_info.byref_regs * 8;
7409 /* Initial gap between fp and sp is 0. */
7410 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7413 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7414 return rounded_frame_size (0);
7416 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7417 return rounded_frame_size (0);
7419 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7420 && (to == HARD_FRAME_POINTER_REGNUM
7421 || to == STACK_POINTER_REGNUM));
7424 int n = total_saved_regs_space;
7425 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7426 save_schedule schedule;
7429 n += total_auto_space;
7431 /* If it wasn't saved, there's not much we can do. */
7432 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7435 target_flags = copy_flags;
7437 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7438 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7439 if (entry->reg == pr_reg)
7441 target_flags = save_flags;
7442 return entry->offset;
7447 return total_auto_space;
7450 /* Insert any deferred function attributes from earlier pragmas. */
7452 sh_insert_attributes (tree node, tree *attributes)
7456 if (TREE_CODE (node) != FUNCTION_DECL)
7459 /* We are only interested in fields. */
7463 /* Append the attributes to the deferred attributes. */
7464 *sh_deferred_function_attributes_tail = *attributes;
7465 attrs = sh_deferred_function_attributes;
7469 /* Some attributes imply or require the interrupt attribute. */
7470 if (!lookup_attribute ("interrupt_handler", attrs)
7471 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7473 /* If we have a trapa_handler, but no interrupt_handler attribute,
7474 insert an interrupt_handler attribute. */
7475 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7476 /* We can't use sh_pr_interrupt here because that's not in the
7479 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7480 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7481 interrupt attribute is missing, we ignore the attribute and warn. */
7482 else if (lookup_attribute ("sp_switch", attrs)
7483 || lookup_attribute ("trap_exit", attrs)
7484 || lookup_attribute ("nosave_low_regs", attrs))
7488 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7490 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7491 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7492 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7493 warning (OPT_Wattributes,
7494 "%qs attribute only applies to interrupt functions",
7495 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7498 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7500 tail = &TREE_CHAIN (*tail);
7503 attrs = *attributes;
7507 /* Install the processed list. */
7508 *attributes = attrs;
7510 /* Clear deferred attributes. */
7511 sh_deferred_function_attributes = NULL_TREE;
7512 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7517 /* Supported attributes:
7519 interrupt_handler -- specifies this function is an interrupt handler.
7521 trapa_handler - like above, but don't save all registers.
7523 sp_switch -- specifies an alternate stack for an interrupt handler
7526 trap_exit -- use a trapa to exit an interrupt function instead of
7529 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7530 This is useful on the SH3 and upwards,
7531 which has a separate set of low regs for User and Supervisor modes.
7532 This should only be used for the lowest level of interrupts. Higher levels
7533 of interrupts must save the registers in case they themselves are
7536 renesas -- use Renesas calling/layout conventions (functions and
7541 const struct attribute_spec sh_attribute_table[] =
7543 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7544 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7545 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7546 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7547 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7548 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7549 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7551 /* Symbian support adds three new attributes:
7552 dllexport - for exporting a function/variable that will live in a dll
7553 dllimport - for importing a function/variable from a dll
7555 Microsoft allows multiple declspecs in one __declspec, separating
7556 them with spaces. We do NOT support this. Instead, use __declspec
7558 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7559 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7561 { NULL, 0, 0, false, false, false, NULL }
7564 /* Handle an "interrupt_handler" attribute; arguments as in
7565 struct attribute_spec.handler. */
7567 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7568 tree args ATTRIBUTE_UNUSED,
7569 int flags ATTRIBUTE_UNUSED,
7572 if (TREE_CODE (*node) != FUNCTION_DECL)
7574 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7575 IDENTIFIER_POINTER (name));
7576 *no_add_attrs = true;
7578 else if (TARGET_SHCOMPACT)
7580 error ("attribute interrupt_handler is not compatible with -m5-compact");
7581 *no_add_attrs = true;
7587 /* Handle an "sp_switch" attribute; arguments as in
7588 struct attribute_spec.handler. */
7590 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7591 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7593 if (TREE_CODE (*node) != FUNCTION_DECL)
7595 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7596 IDENTIFIER_POINTER (name));
7597 *no_add_attrs = true;
7599 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7601 /* The argument must be a constant string. */
7602 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7603 IDENTIFIER_POINTER (name));
7604 *no_add_attrs = true;
7610 /* Handle an "trap_exit" attribute; arguments as in
7611 struct attribute_spec.handler. */
7613 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7614 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7616 if (TREE_CODE (*node) != FUNCTION_DECL)
7618 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7619 IDENTIFIER_POINTER (name));
7620 *no_add_attrs = true;
7622 /* The argument specifies a trap number to be used in a trapa instruction
7623 at function exit (instead of an rte instruction). */
7624 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7626 /* The argument must be a constant integer. */
7627 warning (OPT_Wattributes, "%qs attribute argument not an "
7628 "integer constant", IDENTIFIER_POINTER (name));
7629 *no_add_attrs = true;
7636 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7637 tree name ATTRIBUTE_UNUSED,
7638 tree args ATTRIBUTE_UNUSED,
7639 int flags ATTRIBUTE_UNUSED,
7640 bool *no_add_attrs ATTRIBUTE_UNUSED)
7645 /* True if __attribute__((renesas)) or -mrenesas. */
7647 sh_attr_renesas_p (tree td)
7654 td = TREE_TYPE (td);
7655 if (td == error_mark_node)
7657 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7661 /* True if __attribute__((renesas)) or -mrenesas, for the current
7664 sh_cfun_attr_renesas_p (void)
7666 return sh_attr_renesas_p (current_function_decl);
7670 sh_cfun_interrupt_handler_p (void)
7672 return (lookup_attribute ("interrupt_handler",
7673 DECL_ATTRIBUTES (current_function_decl))
7677 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7680 sh_check_pch_target_flags (int old_flags)
7682 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7683 | MASK_SH_E | MASK_HARD_SH4
7684 | MASK_FPU_SINGLE | MASK_SH4))
7685 return _("created and used with different architectures / ABIs");
7686 if ((old_flags ^ target_flags) & MASK_HITACHI)
7687 return _("created and used with different ABIs");
7688 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7689 return _("created and used with different endianness");
7693 /* Predicates used by the templates. */
7695 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7696 Used only in general_movsrc_operand. */
7699 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7711 /* Nonzero if OP is a floating point value with value 0.0. */
7714 fp_zero_operand (rtx op)
7718 if (GET_MODE (op) != SFmode)
7721 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7722 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7725 /* Nonzero if OP is a floating point value with value 1.0. */
7728 fp_one_operand (rtx op)
7732 if (GET_MODE (op) != SFmode)
7735 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7736 return REAL_VALUES_EQUAL (r, dconst1);
7739 /* For -m4 and -m4-single-only, mode switching is used. If we are
7740 compiling without -mfmovd, movsf_ie isn't taken into account for
7741 mode switching. We could check in machine_dependent_reorg for
7742 cases where we know we are in single precision mode, but there is
7743 interface to find that out during reload, so we must avoid
7744 choosing an fldi alternative during reload and thus failing to
7745 allocate a scratch register for the constant loading. */
7749 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7753 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7755 enum rtx_code code = GET_CODE (op);
7756 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7759 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7761 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7763 if (GET_CODE (op) != SYMBOL_REF)
7765 return SYMBOL_REF_TLS_MODEL (op);
7768 /* Return the destination address of a branch. */
7771 branch_dest (rtx branch)
7773 rtx dest = SET_SRC (PATTERN (branch));
7776 if (GET_CODE (dest) == IF_THEN_ELSE)
7777 dest = XEXP (dest, 1);
7778 dest = XEXP (dest, 0);
7779 dest_uid = INSN_UID (dest);
7780 return INSN_ADDRESSES (dest_uid);
7783 /* Return nonzero if REG is not used after INSN.
7784 We assume REG is a reload reg, and therefore does
7785 not live past labels. It may live past calls or jumps though. */
7787 reg_unused_after (rtx reg, rtx insn)
7792 /* If the reg is set by this instruction, then it is safe for our
7793 case. Disregard the case where this is a store to memory, since
7794 we are checking a register used in the store address. */
7795 set = single_set (insn);
7796 if (set && GET_CODE (SET_DEST (set)) != MEM
7797 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7800 while ((insn = NEXT_INSN (insn)))
7806 code = GET_CODE (insn);
7809 /* If this is a label that existed before reload, then the register
7810 if dead here. However, if this is a label added by reorg, then
7811 the register may still be live here. We can't tell the difference,
7812 so we just ignore labels completely. */
7813 if (code == CODE_LABEL)
7818 if (code == JUMP_INSN)
7821 /* If this is a sequence, we must handle them all at once.
7822 We could have for instance a call that sets the target register,
7823 and an insn in a delay slot that uses the register. In this case,
7824 we must return 0. */
7825 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7830 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7832 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7833 rtx set = single_set (this_insn);
7835 if (GET_CODE (this_insn) == CALL_INSN)
7837 else if (GET_CODE (this_insn) == JUMP_INSN)
7839 if (INSN_ANNULLED_BRANCH_P (this_insn))
7844 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7846 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7848 if (GET_CODE (SET_DEST (set)) != MEM)
7854 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7859 else if (code == JUMP_INSN)
7863 set = single_set (insn);
7864 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7866 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7867 return GET_CODE (SET_DEST (set)) != MEM;
7868 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7871 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7879 static GTY(()) rtx fpscr_rtx;
7881 get_fpscr_rtx (void)
7885 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7886 REG_USERVAR_P (fpscr_rtx) = 1;
7887 mark_user_reg (fpscr_rtx);
7889 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7890 mark_user_reg (fpscr_rtx);
7894 static GTY(()) tree fpscr_values;
7897 emit_fpu_switch (rtx scratch, int index)
7901 if (fpscr_values == NULL)
7905 t = build_index_type (integer_one_node);
7906 t = build_array_type (integer_type_node, t);
7907 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
7908 DECL_ARTIFICIAL (t) = 1;
7909 DECL_IGNORED_P (t) = 1;
7910 DECL_EXTERNAL (t) = 1;
7911 TREE_STATIC (t) = 1;
7912 TREE_PUBLIC (t) = 1;
7918 src = DECL_RTL (fpscr_values);
7921 emit_move_insn (scratch, XEXP (src, 0));
7923 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
7924 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
7927 src = adjust_address (src, PSImode, index * 4);
7929 dst = get_fpscr_rtx ();
7930 emit_move_insn (dst, src);
7934 emit_sf_insn (rtx pat)
7940 emit_df_insn (rtx pat)
7946 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7948 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7952 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7954 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7959 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7961 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7965 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7967 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7971 /* ??? gcc does flow analysis strictly after common subexpression
7972 elimination. As a result, common subexpression elimination fails
7973 when there are some intervening statements setting the same register.
7974 If we did nothing about this, this would hurt the precision switching
7975 for SH4 badly. There is some cse after reload, but it is unable to
7976 undo the extra register pressure from the unused instructions, and
7977 it cannot remove auto-increment loads.
7979 A C code example that shows this flow/cse weakness for (at least) SH
7980 and sparc (as of gcc ss-970706) is this:
7994 So we add another pass before common subexpression elimination, to
7995 remove assignments that are dead due to a following assignment in the
7996 same basic block. */
7999 mark_use (rtx x, rtx *reg_set_block)
8005 code = GET_CODE (x);
8010 int regno = REGNO (x);
8011 int nregs = (regno < FIRST_PSEUDO_REGISTER
8012 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8016 reg_set_block[regno + nregs - 1] = 0;
8023 rtx dest = SET_DEST (x);
8025 if (GET_CODE (dest) == SUBREG)
8026 dest = SUBREG_REG (dest);
8027 if (GET_CODE (dest) != REG)
8028 mark_use (dest, reg_set_block);
8029 mark_use (SET_SRC (x), reg_set_block);
8036 const char *fmt = GET_RTX_FORMAT (code);
8038 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8041 mark_use (XEXP (x, i), reg_set_block);
8042 else if (fmt[i] == 'E')
8043 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8044 mark_use (XVECEXP (x, i, j), reg_set_block);
8051 static rtx get_free_reg (HARD_REG_SET);
8053 /* This function returns a register to use to load the address to load
8054 the fpscr from. Currently it always returns r1 or r7, but when we are
8055 able to use pseudo registers after combine, or have a better mechanism
8056 for choosing a register, it should be done here. */
8057 /* REGS_LIVE is the liveness information for the point for which we
8058 need this allocation. In some bare-bones exit blocks, r1 is live at the
8059 start. We can even have all of r0..r3 being live:
8060 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8061 INSN before which new insns are placed with will clobber the register
8062 we return. If a basic block consists only of setting the return value
8063 register to a pseudo and using that register, the return value is not
8064 live before or after this block, yet we we'll insert our insns right in
8068 get_free_reg (HARD_REG_SET regs_live)
8070 if (! TEST_HARD_REG_BIT (regs_live, 1))
8071 return gen_rtx_REG (Pmode, 1);
8073 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8074 there shouldn't be anything but a jump before the function end. */
8075 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8076 return gen_rtx_REG (Pmode, 7);
8079 /* This function will set the fpscr from memory.
8080 MODE is the mode we are setting it to. */
8082 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8084 enum attr_fp_mode fp_mode = mode;
8085 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8086 rtx addr_reg = get_free_reg (regs_live);
8088 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8091 /* Is the given character a logical line separator for the assembler? */
8092 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8093 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8097 sh_insn_length_adjustment (rtx insn)
8099 /* Instructions with unfilled delay slots take up an extra two bytes for
8100 the nop in the delay slot. */
8101 if (((GET_CODE (insn) == INSN
8102 && GET_CODE (PATTERN (insn)) != USE
8103 && GET_CODE (PATTERN (insn)) != CLOBBER)
8104 || GET_CODE (insn) == CALL_INSN
8105 || (GET_CODE (insn) == JUMP_INSN
8106 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8107 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8108 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8109 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8112 /* SH2e has a bug that prevents the use of annulled branches, so if
8113 the delay slot is not filled, we'll have to put a NOP in it. */
8114 if (sh_cpu == CPU_SH2E
8115 && GET_CODE (insn) == JUMP_INSN
8116 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8117 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8118 && get_attr_type (insn) == TYPE_CBRANCH
8119 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8122 /* sh-dsp parallel processing insn take four bytes instead of two. */
8124 if (GET_CODE (insn) == INSN)
8127 rtx body = PATTERN (insn);
8128 const char *template;
8130 int maybe_label = 1;
8132 if (GET_CODE (body) == ASM_INPUT)
8133 template = XSTR (body, 0);
8134 else if (asm_noperands (body) >= 0)
8136 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8145 while (c == ' ' || c == '\t');
8146 /* all sh-dsp parallel-processing insns start with p.
8147 The only non-ppi sh insn starting with p is pref.
8148 The only ppi starting with pr is prnd. */
8149 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8151 /* The repeat pseudo-insn expands two three insns, a total of
8152 six bytes in size. */
8153 else if ((c == 'r' || c == 'R')
8154 && ! strncasecmp ("epeat", template, 5))
8156 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8158 /* If this is a label, it is obviously not a ppi insn. */
8159 if (c == ':' && maybe_label)
8164 else if (c == '\'' || c == '"')
8169 maybe_label = c != ':';
8177 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8178 isn't protected by a PIC unspec. */
8180 nonpic_symbol_mentioned_p (rtx x)
8182 register const char *fmt;
8185 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8186 || GET_CODE (x) == PC)
8189 /* We don't want to look into the possible MEM location of a
8190 CONST_DOUBLE, since we're not going to use it, in general. */
8191 if (GET_CODE (x) == CONST_DOUBLE)
8194 if (GET_CODE (x) == UNSPEC
8195 && (XINT (x, 1) == UNSPEC_PIC
8196 || XINT (x, 1) == UNSPEC_GOT
8197 || XINT (x, 1) == UNSPEC_GOTOFF
8198 || XINT (x, 1) == UNSPEC_GOTPLT
8199 || XINT (x, 1) == UNSPEC_GOTTPOFF
8200 || XINT (x, 1) == UNSPEC_DTPOFF
8201 || XINT (x, 1) == UNSPEC_PLT))
8204 fmt = GET_RTX_FORMAT (GET_CODE (x));
8205 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8211 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8212 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8215 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8222 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8223 @GOTOFF in `reg'. */
8225 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8228 if (tls_symbolic_operand (orig, Pmode))
8231 if (GET_CODE (orig) == LABEL_REF
8232 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8235 reg = gen_reg_rtx (Pmode);
8237 emit_insn (gen_symGOTOFF2reg (reg, orig));
8240 else if (GET_CODE (orig) == SYMBOL_REF)
8243 reg = gen_reg_rtx (Pmode);
8245 emit_insn (gen_symGOT2reg (reg, orig));
8251 /* Mark the use of a constant in the literal table. If the constant
8252 has multiple labels, make it unique. */
8254 mark_constant_pool_use (rtx x)
8256 rtx insn, lab, pattern;
8261 switch (GET_CODE (x))
8271 /* Get the first label in the list of labels for the same constant
8272 and delete another labels in the list. */
8274 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8276 if (GET_CODE (insn) != CODE_LABEL
8277 || LABEL_REFS (insn) != NEXT_INSN (insn))
8282 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8283 INSN_DELETED_P (insn) = 1;
8285 /* Mark constants in a window. */
8286 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8288 if (GET_CODE (insn) != INSN)
8291 pattern = PATTERN (insn);
8292 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8295 switch (XINT (pattern, 1))
8297 case UNSPECV_CONST2:
8298 case UNSPECV_CONST4:
8299 case UNSPECV_CONST8:
8300 XVECEXP (pattern, 0, 1) = const1_rtx;
8302 case UNSPECV_WINDOW_END:
8303 if (XVECEXP (pattern, 0, 0) == x)
8306 case UNSPECV_CONST_END:
8316 /* Return true if it's possible to redirect BRANCH1 to the destination
8317 of an unconditional jump BRANCH2. We only want to do this if the
8318 resulting branch will have a short displacement. */
8320 sh_can_redirect_branch (rtx branch1, rtx branch2)
8322 if (flag_expensive_optimizations && simplejump_p (branch2))
8324 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8328 for (distance = 0, insn = NEXT_INSN (branch1);
8329 insn && distance < 256;
8330 insn = PREV_INSN (insn))
8335 distance += get_attr_length (insn);
8337 for (distance = 0, insn = NEXT_INSN (branch1);
8338 insn && distance < 256;
8339 insn = NEXT_INSN (insn))
8344 distance += get_attr_length (insn);
8350 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8352 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8353 unsigned int new_reg)
8355 /* Interrupt functions can only use registers that have already been
8356 saved by the prologue, even if they would normally be
8359 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8365 /* Function to update the integer COST
8366 based on the relationship between INSN that is dependent on
8367 DEP_INSN through the dependence LINK. The default is to make no
8368 adjustment to COST. This can be used for example to specify to
8369 the scheduler that an output- or anti-dependence does not incur
8370 the same cost as a data-dependence. The return value should be
8371 the new value for COST. */
8373 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8379 /* On SHmedia, if the dependence is an anti-dependence or
8380 output-dependence, there is no cost. */
8381 if (REG_NOTE_KIND (link) != 0)
8383 /* However, dependencies between target register loads and
8384 uses of the register in a subsequent block that are separated
8385 by a conditional branch are not modelled - we have to do with
8386 the anti-dependency between the target register load and the
8387 conditional branch that ends the current block. */
8388 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8389 && GET_CODE (PATTERN (dep_insn)) == SET
8390 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8391 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8392 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8394 int orig_cost = cost;
8395 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8396 rtx target = ((! note
8397 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8398 ? insn : JUMP_LABEL (insn));
8399 /* On the likely path, the branch costs 1, on the unlikely path,
8403 target = next_active_insn (target);
8404 while (target && ! flow_dependent_p (target, dep_insn)
8406 /* If two branches are executed in immediate succession, with the
8407 first branch properly predicted, this causes a stall at the
8408 second branch, hence we won't need the target for the
8409 second branch for two cycles after the launch of the first
8411 if (cost > orig_cost - 2)
8412 cost = orig_cost - 2;
8418 else if (get_attr_is_mac_media (insn)
8419 && get_attr_is_mac_media (dep_insn))
8422 else if (! reload_completed
8423 && GET_CODE (PATTERN (insn)) == SET
8424 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8425 && GET_CODE (PATTERN (dep_insn)) == SET
8426 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8429 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8430 that is needed at the target. */
8431 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8432 && ! flow_dependent_p (insn, dep_insn))
8435 else if (REG_NOTE_KIND (link) == 0)
8437 enum attr_type dep_type, type;
8439 if (recog_memoized (insn) < 0
8440 || recog_memoized (dep_insn) < 0)
8443 dep_type = get_attr_type (dep_insn);
8444 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8446 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8447 && (type = get_attr_type (insn)) != TYPE_CALL
8448 && type != TYPE_SFUNC)
8451 /* The only input for a call that is timing-critical is the
8452 function's address. */
8453 if (GET_CODE(insn) == CALL_INSN)
8455 rtx call = PATTERN (insn);
8457 if (GET_CODE (call) == PARALLEL)
8458 call = XVECEXP (call, 0 ,0);
8459 if (GET_CODE (call) == SET)
8460 call = SET_SRC (call);
8461 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8462 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8463 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8464 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8467 /* Likewise, the most timing critical input for an sfuncs call
8468 is the function address. However, sfuncs typically start
8469 using their arguments pretty quickly.
8470 Assume a four cycle delay before they are needed. */
8471 /* All sfunc calls are parallels with at least four components.
8472 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8473 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8474 && XVECLEN (PATTERN (insn), 0) >= 4
8475 && (reg = sfunc_uses_reg (insn)))
8477 if (! reg_set_p (reg, dep_insn))
8480 /* When the preceding instruction loads the shift amount of
8481 the following SHAD/SHLD, the latency of the load is increased
8484 && get_attr_type (insn) == TYPE_DYN_SHIFT
8485 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8486 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8487 XEXP (SET_SRC (single_set (insn)),
8490 /* When an LS group instruction with a latency of less than
8491 3 cycles is followed by a double-precision floating-point
8492 instruction, FIPR, or FTRV, the latency of the first
8493 instruction is increased to 3 cycles. */
8495 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8496 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8498 /* The lsw register of a double-precision computation is ready one
8500 else if (reload_completed
8501 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8502 && (use_pat = single_set (insn))
8503 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8507 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8508 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8511 /* An anti-dependence penalty of two applies if the first insn is a double
8512 precision fadd / fsub / fmul. */
8513 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8514 && recog_memoized (dep_insn) >= 0
8515 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8516 /* A lot of alleged anti-flow dependences are fake,
8517 so check this one is real. */
8518 && flow_dependent_p (dep_insn, insn))
8525 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8526 if DEP_INSN is anti-flow dependent on INSN. */
8528 flow_dependent_p (rtx insn, rtx dep_insn)
8530 rtx tmp = PATTERN (insn);
8532 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8533 return tmp == NULL_RTX;
8536 /* A helper function for flow_dependent_p called through note_stores. */
8538 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8540 rtx * pinsn = (rtx *) data;
8542 if (*pinsn && reg_referenced_p (x, *pinsn))
8546 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8547 'special function' patterns (type sfunc) that clobber pr, but that
8548 do not look like function calls to leaf_function_p. Hence we must
8549 do this extra check. */
8553 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8556 /* Return where to allocate pseudo for a given hard register initial
8559 sh_allocate_initial_value (rtx hard_reg)
8563 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8565 if (current_function_is_leaf
8566 && ! sh_pr_n_sets ()
8567 && ! (TARGET_SHCOMPACT
8568 && ((current_function_args_info.call_cookie
8569 & ~ CALL_COOKIE_RET_TRAMP (1))
8570 || current_function_has_nonlocal_label)))
8573 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8581 /* This function returns "2" to indicate dual issue for the SH4
8582 processor. To be used by the DFA pipeline description. */
8584 sh_issue_rate (void)
8586 if (TARGET_SUPERSCALAR)
8592 /* Functions for ready queue reordering for sched1. */
8594 /* Get weight for mode for a set x. */
8596 find_set_regmode_weight (rtx x, enum machine_mode mode)
8598 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8600 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8602 if (GET_CODE (SET_DEST (x)) == REG)
8604 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8614 /* Get regmode weight for insn. */
8616 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8618 short reg_weight = 0;
8621 /* Increment weight for each register born here. */
8623 reg_weight += find_set_regmode_weight (x, mode);
8624 if (GET_CODE (x) == PARALLEL)
8627 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8629 x = XVECEXP (PATTERN (insn), 0, j);
8630 reg_weight += find_set_regmode_weight (x, mode);
8633 /* Decrement weight for each register that dies here. */
8634 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8636 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8638 rtx note = XEXP (x, 0);
8639 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8646 /* Calculate regmode weights for all insns of a basic block. */
8648 find_regmode_weight (int b, enum machine_mode mode)
8650 rtx insn, next_tail, head, tail;
8652 get_block_head_tail (b, &head, &tail);
8653 next_tail = NEXT_INSN (tail);
8655 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8657 /* Handle register life information. */
8662 INSN_REGMODE_WEIGHT (insn, mode) =
8663 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8664 else if (mode == SImode)
8665 INSN_REGMODE_WEIGHT (insn, mode) =
8666 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8670 /* Comparison function for ready queue sorting. */
8672 rank_for_reorder (const void *x, const void *y)
8674 rtx tmp = *(const rtx *) y;
8675 rtx tmp2 = *(const rtx *) x;
8677 /* The insn in a schedule group should be issued the first. */
8678 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8679 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8681 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8682 minimizes instruction movement, thus minimizing sched's effect on
8683 register pressure. */
8684 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8687 /* Resort the array A in which only element at index N may be out of order. */
8689 swap_reorder (rtx *a, int n)
8691 rtx insn = a[n - 1];
8694 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8702 #define SCHED_REORDER(READY, N_READY) \
8705 if ((N_READY) == 2) \
8706 swap_reorder (READY, N_READY); \
8707 else if ((N_READY) > 2) \
8708 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8712 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8715 ready_reorder (rtx *ready, int nready)
8717 SCHED_REORDER (ready, nready);
8720 /* Calculate regmode weights for all insns of all basic block. */
8722 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8723 int verbose ATTRIBUTE_UNUSED,
8728 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8729 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8731 FOR_EACH_BB_REVERSE (b)
8733 find_regmode_weight (b->index, SImode);
8734 find_regmode_weight (b->index, SFmode);
8737 CURR_REGMODE_PRESSURE (SImode) = 0;
8738 CURR_REGMODE_PRESSURE (SFmode) = 0;
8744 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8745 int verbose ATTRIBUTE_UNUSED)
8747 if (regmode_weight[0])
8749 free (regmode_weight[0]);
8750 regmode_weight[0] = NULL;
8752 if (regmode_weight[1])
8754 free (regmode_weight[1]);
8755 regmode_weight[1] = NULL;
8759 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8760 keep count of register pressures on SImode and SFmode. */
8762 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8763 int sched_verbose ATTRIBUTE_UNUSED,
8767 if (GET_CODE (PATTERN (insn)) != USE
8768 && GET_CODE (PATTERN (insn)) != CLOBBER)
8769 cached_can_issue_more = can_issue_more - 1;
8771 cached_can_issue_more = can_issue_more;
8773 if (reload_completed)
8774 return cached_can_issue_more;
8776 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8777 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8779 return cached_can_issue_more;
8783 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8784 int verbose ATTRIBUTE_UNUSED,
8785 int veclen ATTRIBUTE_UNUSED)
8787 CURR_REGMODE_PRESSURE (SImode) = 0;
8788 CURR_REGMODE_PRESSURE (SFmode) = 0;
8791 /* Some magic numbers. */
8792 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8793 functions that already have high pressure on r0. */
8794 #define R0_MAX_LIFE_REGIONS 2
8795 #define R0_MAX_LIVE_LENGTH 12
8796 /* Register Pressure thresholds for SImode and SFmode registers. */
8797 #define SIMODE_MAX_WEIGHT 5
8798 #define SFMODE_MAX_WEIGHT 10
8800 /* Return true if the pressure is high for MODE. */
8802 high_pressure (enum machine_mode mode)
8804 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8805 functions that already have high pressure on r0. */
8806 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8807 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8811 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8813 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8816 /* Reorder ready queue if register pressure is high. */
8818 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8819 int sched_verbose ATTRIBUTE_UNUSED,
8822 int clock_var ATTRIBUTE_UNUSED)
8824 if (reload_completed)
8825 return sh_issue_rate ();
8827 if (high_pressure (SFmode) || high_pressure (SImode))
8829 ready_reorder (ready, *n_readyp);
8832 return sh_issue_rate ();
8835 /* Skip cycles if the current register pressure is high. */
8837 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8838 int sched_verbose ATTRIBUTE_UNUSED,
8839 rtx *ready ATTRIBUTE_UNUSED,
8840 int *n_readyp ATTRIBUTE_UNUSED,
8841 int clock_var ATTRIBUTE_UNUSED)
8843 if (reload_completed)
8844 return cached_can_issue_more;
8846 if (high_pressure(SFmode) || high_pressure (SImode))
8849 return cached_can_issue_more;
8852 /* Skip cycles without sorting the ready queue. This will move insn from
8853 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8854 queue by sh_reorder. */
8856 /* Generally, skipping these many cycles are sufficient for all insns to move
8861 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8862 int sched_verbose ATTRIBUTE_UNUSED,
8863 rtx insn ATTRIBUTE_UNUSED,
8868 if (reload_completed)
8873 if ((clock_var - last_clock_var) < MAX_SKIPS)
8878 /* If this is the last cycle we are skipping, allow reordering of R. */
8879 if ((clock_var - last_clock_var) == MAX_SKIPS)
8891 /* SHmedia requires registers for branches, so we can't generate new
8892 branches past reload. */
8894 sh_cannot_modify_jumps_p (void)
8896 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8900 sh_target_reg_class (void)
8902 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8906 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8911 if (! shmedia_space_reserved_for_target_registers)
8913 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8915 if (calc_live_regs (&dummy) >= 6 * 8)
8917 /* This is a borderline case. See if we got a nested loop, or a loop
8918 with a call, or with more than 4 labels inside. */
8919 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8921 if (GET_CODE (insn) == NOTE
8922 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8928 insn = NEXT_INSN (insn);
8929 if ((GET_CODE (insn) == NOTE
8930 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8931 || GET_CODE (insn) == CALL_INSN
8932 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8935 while (GET_CODE (insn) != NOTE
8936 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8943 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8945 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8949 On the SH1..SH4, the trampoline looks like
8950 2 0002 D202 mov.l l2,r2
8951 1 0000 D301 mov.l l1,r3
8954 5 0008 00000000 l1: .long area
8955 6 000c 00000000 l2: .long function
8957 SH5 (compact) uses r1 instead of r3 for the static chain. */
8960 /* Emit RTL insns to initialize the variable parts of a trampoline.
8961 FNADDR is an RTX for the address of the function's pure code.
8962 CXT is an RTX for the static chain value for the function. */
8965 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8967 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
8969 if (TARGET_SHMEDIA64)
8974 rtx movi1 = GEN_INT (0xcc000010);
8975 rtx shori1 = GEN_INT (0xc8000010);
8978 /* The following trampoline works within a +- 128 KB range for cxt:
8979 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8980 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8981 gettr tr1,r1; blink tr0,r63 */
8982 /* Address rounding makes it hard to compute the exact bounds of the
8983 offset for this trampoline, but we have a rather generous offset
8984 range, so frame_offset should do fine as an upper bound. */
8985 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8987 /* ??? could optimize this trampoline initialization
8988 by writing DImode words with two insns each. */
8989 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8990 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8991 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8992 insn = gen_rtx_AND (DImode, insn, mask);
8993 /* Or in ptb/u .,tr1 pattern */
8994 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8995 insn = force_operand (insn, NULL_RTX);
8996 insn = gen_lowpart (SImode, insn);
8997 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
8998 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8999 insn = gen_rtx_AND (DImode, insn, mask);
9000 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9001 insn = gen_lowpart (SImode, insn);
9002 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9003 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9004 insn = gen_rtx_AND (DImode, insn, mask);
9005 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9006 insn = gen_lowpart (SImode, insn);
9007 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9008 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9009 insn = gen_rtx_AND (DImode, insn, mask);
9010 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9011 insn = gen_lowpart (SImode, insn);
9012 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9013 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9014 insn = gen_rtx_AND (DImode, insn, mask);
9015 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9016 insn = gen_lowpart (SImode, insn);
9017 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9018 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9019 GEN_INT (0x6bf10600));
9020 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9021 GEN_INT (0x4415fc10));
9022 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9023 GEN_INT (0x4401fff0));
9024 emit_insn (gen_ic_invalidate_line (tramp));
9027 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9028 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9030 tramp_templ = gen_datalabel_ref (tramp_templ);
9032 src = gen_const_mem (BLKmode, tramp_templ);
9033 set_mem_align (dst, 256);
9034 set_mem_align (src, 64);
9035 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9037 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9038 emit_move_insn (adjust_address (tramp_mem, Pmode,
9039 fixed_len + GET_MODE_SIZE (Pmode)),
9041 emit_insn (gen_ic_invalidate_line (tramp));
9044 else if (TARGET_SHMEDIA)
9046 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9047 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9048 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9049 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9050 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9051 rotated 10 right, and higher 16 bit of every 32 selected. */
9053 = force_reg (V2HImode, (simplify_gen_subreg
9054 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9055 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9056 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9058 tramp = force_reg (Pmode, tramp);
9059 fnaddr = force_reg (SImode, fnaddr);
9060 cxt = force_reg (SImode, cxt);
9061 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9062 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9064 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9065 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9066 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9067 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9068 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9069 gen_rtx_SUBREG (V2HImode, cxt, 0),
9071 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9072 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9073 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9074 if (TARGET_LITTLE_ENDIAN)
9076 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9077 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9081 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9082 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9084 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9085 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9086 emit_insn (gen_ic_invalidate_line (tramp));
9089 else if (TARGET_SHCOMPACT)
9091 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9094 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9095 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9097 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9098 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9100 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9101 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9104 if (TARGET_USERMODE)
9105 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9107 0, VOIDmode, 1, tramp, SImode);
9109 emit_insn (gen_ic_invalidate_line (tramp));
9113 /* FIXME: This is overly conservative. A SHcompact function that
9114 receives arguments ``by reference'' will have them stored in its
9115 own stack frame, so it must not pass pointers or references to
9116 these arguments to other functions by means of sibling calls. */
9117 /* If PIC, we cannot make sibling calls to global functions
9118 because the PLT requires r12 to be live. */
9120 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9123 && (! TARGET_SHCOMPACT
9124 || current_function_args_info.stack_regs == 0)
9125 && ! sh_cfun_interrupt_handler_p ()
9127 || (decl && ! TREE_PUBLIC (decl))
9128 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9131 /* Machine specific built-in functions. */
9133 struct builtin_description
9135 const enum insn_code icode;
9136 const char *const name;
9140 /* describe number and signedness of arguments; arg[0] == result
9141 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9142 /* 9: 64 bit pointer, 10: 32 bit pointer */
9143 static const char signature_args[][4] =
9145 #define SH_BLTIN_V2SI2 0
9147 #define SH_BLTIN_V4HI2 1
9149 #define SH_BLTIN_V2SI3 2
9151 #define SH_BLTIN_V4HI3 3
9153 #define SH_BLTIN_V8QI3 4
9155 #define SH_BLTIN_MAC_HISI 5
9157 #define SH_BLTIN_SH_HI 6
9159 #define SH_BLTIN_SH_SI 7
9161 #define SH_BLTIN_V4HI2V2SI 8
9163 #define SH_BLTIN_V4HI2V8QI 9
9165 #define SH_BLTIN_SISF 10
9167 #define SH_BLTIN_LDUA_L 11
9169 #define SH_BLTIN_LDUA_Q 12
9171 #define SH_BLTIN_STUA_L 13
9173 #define SH_BLTIN_STUA_Q 14
9175 #define SH_BLTIN_LDUA_L64 15
9177 #define SH_BLTIN_LDUA_Q64 16
9179 #define SH_BLTIN_STUA_L64 17
9181 #define SH_BLTIN_STUA_Q64 18
9183 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9184 #define SH_BLTIN_2 19
9185 #define SH_BLTIN_SU 19
9187 #define SH_BLTIN_3 20
9188 #define SH_BLTIN_SUS 20
9190 #define SH_BLTIN_PSSV 21
9192 #define SH_BLTIN_XXUU 22
9193 #define SH_BLTIN_UUUU 22
9195 #define SH_BLTIN_PV 23
9198 /* mcmv: operands considered unsigned. */
9199 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9200 /* mperm: control value considered unsigned int. */
9201 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9202 /* mshards_q: returns signed short. */
9203 /* nsb: takes long long arg, returns unsigned char. */
9204 static const struct builtin_description bdesc[] =
9206 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9207 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9208 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9209 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9210 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9211 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9212 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9214 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9215 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9216 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9217 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9218 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9219 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9220 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9221 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9222 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9223 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9224 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9225 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9226 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9227 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9228 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9229 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9230 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9231 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9232 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9233 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9234 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9235 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9236 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9237 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9238 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9239 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9240 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9241 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9242 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9243 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9244 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9245 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9246 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9247 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9248 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9249 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9250 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9251 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9252 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9253 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9254 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9255 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9256 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9257 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9258 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9259 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9260 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9261 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9262 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9263 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9264 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9265 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9266 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9267 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9268 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9269 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9270 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9271 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9272 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9273 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9274 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9275 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9276 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9277 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9278 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9279 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9280 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9281 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9282 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9283 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9284 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9285 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9286 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9287 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9288 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9289 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9293 sh_media_init_builtins (void)
9295 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9296 const struct builtin_description *d;
9298 memset (shared, 0, sizeof shared);
9299 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9301 tree type, arg_type = 0;
9302 int signature = d->signature;
9305 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9306 type = shared[signature];
9309 int has_result = signature_args[signature][0] != 0;
9311 if ((signature_args[signature][1] & 8)
9312 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9313 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9315 if (! TARGET_FPU_ANY
9316 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9318 type = void_list_node;
9321 int arg = signature_args[signature][i];
9322 int opno = i - 1 + has_result;
9325 arg_type = ptr_type_node;
9327 arg_type = (*lang_hooks.types.type_for_mode)
9328 (insn_data[d->icode].operand[opno].mode,
9333 arg_type = void_type_node;
9336 type = tree_cons (NULL_TREE, arg_type, type);
9338 type = build_function_type (arg_type, type);
9339 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9340 shared[signature] = type;
9342 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9347 /* Implements target hook vector_mode_supported_p. */
9349 sh_vector_mode_supported_p (enum machine_mode mode)
9352 && ((mode == V2SFmode)
9353 || (mode == V4SFmode)
9354 || (mode == V16SFmode)))
9357 else if (TARGET_SHMEDIA
9358 && ((mode == V8QImode)
9359 || (mode == V2HImode)
9360 || (mode == V4HImode)
9361 || (mode == V2SImode)))
9367 /* Implements target hook dwarf_calling_convention. Return an enum
9368 of dwarf_calling_convention. */
9370 sh_dwarf_calling_convention (tree func)
9372 if (sh_attr_renesas_p (func))
9373 return DW_CC_GNU_renesas_sh;
9375 return DW_CC_normal;
9379 sh_init_builtins (void)
9382 sh_media_init_builtins ();
9385 /* Expand an expression EXP that calls a built-in function,
9386 with result going to TARGET if that's convenient
9387 (and in mode MODE if that's convenient).
9388 SUBTARGET may be used as the target for computing one of EXP's operands.
9389 IGNORE is nonzero if the value is to be ignored. */
9392 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9393 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9395 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9396 tree arglist = TREE_OPERAND (exp, 1);
9397 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9398 const struct builtin_description *d = &bdesc[fcode];
9399 enum insn_code icode = d->icode;
9400 int signature = d->signature;
9401 enum machine_mode tmode = VOIDmode;
9406 if (signature_args[signature][0])
9411 tmode = insn_data[icode].operand[0].mode;
9413 || GET_MODE (target) != tmode
9414 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9415 target = gen_reg_rtx (tmode);
9421 for (i = 1; i <= 3; i++, nop++)
9424 enum machine_mode opmode, argmode;
9427 if (! signature_args[signature][i])
9429 arg = TREE_VALUE (arglist);
9430 if (arg == error_mark_node)
9432 arglist = TREE_CHAIN (arglist);
9433 if (signature_args[signature][i] & 8)
9436 optype = ptr_type_node;
9440 opmode = insn_data[icode].operand[nop].mode;
9441 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9443 argmode = TYPE_MODE (TREE_TYPE (arg));
9444 if (argmode != opmode)
9445 arg = build1 (NOP_EXPR, optype, arg);
9446 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9447 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9448 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9454 pat = (*insn_data[d->icode].genfun) (op[0]);
9457 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9460 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9463 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9475 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9477 rtx sel0 = const0_rtx;
9478 rtx sel1 = const1_rtx;
9479 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9480 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9482 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9483 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9487 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9489 rtx sel0 = const0_rtx;
9490 rtx sel1 = const1_rtx;
9491 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9493 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9495 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9496 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9499 /* Return the class of registers for which a mode change from FROM to TO
9502 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9503 enum reg_class class)
9505 /* We want to enable the use of SUBREGs as a means to
9506 VEC_SELECT a single element of a vector. */
9507 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9508 return (reg_classes_intersect_p (GENERAL_REGS, class));
9510 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9512 if (TARGET_LITTLE_ENDIAN)
9514 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9515 return reg_classes_intersect_p (DF_REGS, class);
9519 if (GET_MODE_SIZE (from) < 8)
9520 return reg_classes_intersect_p (DF_HI_REGS, class);
9527 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9528 that label is used. */
9531 sh_mark_label (rtx address, int nuses)
9533 if (GOTOFF_P (address))
9535 /* Extract the label or symbol. */
9536 address = XEXP (address, 0);
9537 if (GET_CODE (address) == PLUS)
9538 address = XEXP (address, 0);
9539 address = XVECEXP (address, 0, 0);
9541 if (GET_CODE (address) == LABEL_REF
9542 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9543 LABEL_NUSES (XEXP (address, 0)) += nuses;
9546 /* Compute extra cost of moving data between one register class
9549 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9550 uses this information. Hence, the general register <-> floating point
9551 register information here is not used for SFmode. */
9554 sh_register_move_cost (enum machine_mode mode,
9555 enum reg_class srcclass, enum reg_class dstclass)
9557 if (dstclass == T_REGS || dstclass == PR_REGS)
9560 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9563 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9564 && REGCLASS_HAS_FP_REG (srcclass)
9565 && REGCLASS_HAS_FP_REG (dstclass))
9568 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9569 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9571 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9572 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9575 if ((REGCLASS_HAS_FP_REG (dstclass)
9576 && REGCLASS_HAS_GENERAL_REG (srcclass))
9577 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9578 && REGCLASS_HAS_FP_REG (srcclass)))
9579 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9580 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9582 if ((dstclass == FPUL_REGS
9583 && REGCLASS_HAS_GENERAL_REG (srcclass))
9584 || (srcclass == FPUL_REGS
9585 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9588 if ((dstclass == FPUL_REGS
9589 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9590 || (srcclass == FPUL_REGS
9591 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9594 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9595 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9598 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9600 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9602 if (sh_gettrcost >= 0)
9603 return sh_gettrcost;
9604 else if (!TARGET_PT_FIXED)
9608 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9609 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9614 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9615 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9616 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9618 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9621 static rtx emit_load_ptr (rtx, rtx);
9624 emit_load_ptr (rtx reg, rtx addr)
9626 rtx mem = gen_const_mem (ptr_mode, addr);
9628 if (Pmode != ptr_mode)
9629 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9630 return emit_move_insn (reg, mem);
9634 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9635 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9638 CUMULATIVE_ARGS cum;
9639 int structure_value_byref = 0;
9640 rtx this, this_value, sibcall, insns, funexp;
9641 tree funtype = TREE_TYPE (function);
9642 int simple_add = CONST_OK_FOR_ADD (delta);
9644 rtx scratch0, scratch1, scratch2;
9647 reload_completed = 1;
9648 epilogue_completed = 1;
9650 current_function_uses_only_leaf_regs = 1;
9651 reset_block_changes ();
9653 emit_note (NOTE_INSN_PROLOGUE_END);
9655 /* Find the "this" pointer. We have such a wide range of ABIs for the
9656 SH that it's best to do this completely machine independently.
9657 "this" is passed as first argument, unless a structure return pointer
9658 comes first, in which case "this" comes second. */
9659 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9660 #ifndef PCC_STATIC_STRUCT_RETURN
9661 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9662 structure_value_byref = 1;
9663 #endif /* not PCC_STATIC_STRUCT_RETURN */
9664 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9666 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9668 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9670 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9672 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9673 static chain pointer (even if you can't have nested virtual functions
9674 right now, someone might implement them sometime), and the rest of the
9675 registers are used for argument passing, are callee-saved, or reserved. */
9676 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9677 -ffixed-reg has been used. */
9678 if (! call_used_regs[0] || fixed_regs[0])
9679 error ("r0 needs to be available as a call-clobbered register");
9680 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9683 if (call_used_regs[1] && ! fixed_regs[1])
9684 scratch1 = gen_rtx_REG (ptr_mode, 1);
9685 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9686 pointing where to return struct values. */
9687 if (call_used_regs[3] && ! fixed_regs[3])
9688 scratch2 = gen_rtx_REG (Pmode, 3);
9690 else if (TARGET_SHMEDIA)
9692 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9693 if (i != REGNO (scratch0) &&
9694 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9696 scratch1 = gen_rtx_REG (ptr_mode, i);
9699 if (scratch1 == scratch0)
9700 error ("Need a second call-clobbered general purpose register");
9701 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9702 if (call_used_regs[i] && ! fixed_regs[i])
9704 scratch2 = gen_rtx_REG (Pmode, i);
9707 if (scratch2 == scratch0)
9708 error ("Need a call-clobbered target register");
9711 this_value = plus_constant (this, delta);
9713 && (simple_add || scratch0 != scratch1)
9714 && strict_memory_address_p (ptr_mode, this_value))
9716 emit_load_ptr (scratch0, this_value);
9722 else if (simple_add)
9723 emit_move_insn (this, this_value);
9726 emit_move_insn (scratch1, GEN_INT (delta));
9727 emit_insn (gen_add2_insn (this, scratch1));
9735 emit_load_ptr (scratch0, this);
9737 offset_addr = plus_constant (scratch0, vcall_offset);
9738 if (strict_memory_address_p (ptr_mode, offset_addr))
9740 else if (! TARGET_SH5 && scratch0 != scratch1)
9742 /* scratch0 != scratch1, and we have indexed loads. Get better
9743 schedule by loading the offset into r1 and using an indexed
9744 load - then the load of r1 can issue before the load from
9745 (this + delta) finishes. */
9746 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9747 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9749 else if (CONST_OK_FOR_ADD (vcall_offset))
9751 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9752 offset_addr = scratch0;
9754 else if (scratch0 != scratch1)
9756 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9757 emit_insn (gen_add2_insn (scratch0, scratch1));
9758 offset_addr = scratch0;
9761 gcc_unreachable (); /* FIXME */
9762 emit_load_ptr (scratch0, offset_addr);
9764 if (Pmode != ptr_mode)
9765 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9766 emit_insn (gen_add2_insn (this, scratch0));
9769 /* Generate a tail call to the target function. */
9770 if (! TREE_USED (function))
9772 assemble_external (function);
9773 TREE_USED (function) = 1;
9775 funexp = XEXP (DECL_RTL (function), 0);
9776 /* If the function is overridden, so is the thunk, hence we don't
9777 need GOT addressing even if this is a public symbol. */
9779 if (TARGET_SH1 && ! flag_weak)
9780 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9783 if (TARGET_SH2 && flag_pic)
9785 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9786 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9790 if (TARGET_SHMEDIA && flag_pic)
9792 funexp = gen_sym2PIC (funexp);
9793 PUT_MODE (funexp, Pmode);
9795 emit_move_insn (scratch2, funexp);
9796 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9797 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9799 sibcall = emit_call_insn (sibcall);
9800 SIBLING_CALL_P (sibcall) = 1;
9801 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9804 /* Run just enough of rest_of_compilation to do scheduling and get
9805 the insns emitted. Note that use_thunk calls
9806 assemble_start_function and assemble_end_function. */
9808 insn_locators_initialize ();
9809 insns = get_insns ();
9813 /* Initialize the bitmap obstacks. */
9814 bitmap_obstack_initialize (NULL);
9815 bitmap_obstack_initialize (®_obstack);
9818 rtl_register_cfg_hooks ();
9819 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9820 init_rtl_bb_info (EXIT_BLOCK_PTR);
9821 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9822 EXIT_BLOCK_PTR->flags |= BB_RTL;
9823 find_basic_blocks (insns);
9825 if (flag_schedule_insns_after_reload)
9827 life_analysis (PROP_FINAL);
9829 split_all_insns (1);
9833 /* We must split jmp insn in PIC case. */
9835 split_all_insns_noflow ();
9840 if (optimize > 0 && flag_delayed_branch)
9841 dbr_schedule (insns);
9843 shorten_branches (insns);
9844 final_start_function (insns, file, 1);
9845 final (insns, file, 1);
9846 final_end_function ();
9850 /* Release all memory allocated by flow. */
9851 free_basic_block_vars ();
9853 /* Release the bitmap obstacks. */
9854 bitmap_obstack_release (®_obstack);
9855 bitmap_obstack_release (NULL);
9858 reload_completed = 0;
9859 epilogue_completed = 0;
9864 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9868 /* If this is not an ordinary function, the name usually comes from a
9869 string literal or an sprintf buffer. Make sure we use the same
9870 string consistently, so that cse will be able to unify address loads. */
9871 if (kind != FUNCTION_ORDINARY)
9872 name = IDENTIFIER_POINTER (get_identifier (name));
9873 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9874 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9878 case FUNCTION_ORDINARY:
9882 rtx reg = target ? target : gen_reg_rtx (Pmode);
9884 emit_insn (gen_symGOT2reg (reg, sym));
9890 /* ??? To allow cse to work, we use GOTOFF relocations.
9891 we could add combiner patterns to transform this into
9892 straight pc-relative calls with sym2PIC / bsrf when
9893 label load and function call are still 1:1 and in the
9894 same basic block during combine. */
9895 rtx reg = target ? target : gen_reg_rtx (Pmode);
9897 emit_insn (gen_symGOTOFF2reg (reg, sym));
9902 if (target && sym != target)
9904 emit_move_insn (target, sym);
9910 /* Find the number of a general purpose register in S. */
9912 scavenge_reg (HARD_REG_SET *s)
9915 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9916 if (TEST_HARD_REG_BIT (*s, r))
9922 sh_get_pr_initial_val (void)
9926 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9927 PR register on SHcompact, because it might be clobbered by the prologue.
9928 We check first if that is known to be the case. */
9929 if (TARGET_SHCOMPACT
9930 && ((current_function_args_info.call_cookie
9931 & ~ CALL_COOKIE_RET_TRAMP (1))
9932 || current_function_has_nonlocal_label))
9933 return gen_frame_mem (SImode, return_address_pointer_rtx);
9935 /* If we haven't finished rtl generation, there might be a nonlocal label
9936 that we haven't seen yet.
9937 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9938 is set, unless it has been called before for the same register. And even
9939 then, we end in trouble if we didn't use the register in the same
9940 basic block before. So call get_hard_reg_initial_val now and wrap it
9941 in an unspec if we might need to replace it. */
9942 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9943 combine can put the pseudo returned by get_hard_reg_initial_val into
9944 instructions that need a general purpose registers, which will fail to
9945 be recognized when the pseudo becomes allocated to PR. */
9947 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9949 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9954 sh_expand_t_scc (enum rtx_code code, rtx target)
9956 rtx result = target;
9959 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9960 || GET_CODE (sh_compare_op1) != CONST_INT)
9962 if (GET_CODE (result) != REG)
9963 result = gen_reg_rtx (SImode);
9964 val = INTVAL (sh_compare_op1);
9965 if ((code == EQ && val == 1) || (code == NE && val == 0))
9966 emit_insn (gen_movt (result));
9967 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9969 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9970 emit_insn (gen_subc (result, result, result));
9971 emit_insn (gen_addsi3 (result, result, const1_rtx));
9973 else if (code == EQ || code == NE)
9974 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9977 if (result != target)
9978 emit_move_insn (target, result);
9982 /* INSN is an sfunc; return the rtx that describes the address used. */
9984 extract_sfunc_addr (rtx insn)
9986 rtx pattern, part = NULL_RTX;
9989 pattern = PATTERN (insn);
9990 len = XVECLEN (pattern, 0);
9991 for (i = 0; i < len; i++)
9993 part = XVECEXP (pattern, 0, i);
9994 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9995 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9996 return XEXP (part, 0);
9998 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
9999 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10002 /* Verify that the register in use_sfunc_addr still agrees with the address
10003 used in the sfunc. This prevents fill_slots_from_thread from changing
10005 INSN is the use_sfunc_addr instruction, and REG is the register it
10008 check_use_sfunc_addr (rtx insn, rtx reg)
10010 /* Search for the sfunc. It should really come right after INSN. */
10011 while ((insn = NEXT_INSN (insn)))
10013 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10015 if (! INSN_P (insn))
10018 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10019 insn = XVECEXP (PATTERN (insn), 0, 0);
10020 if (GET_CODE (PATTERN (insn)) != PARALLEL
10021 || get_attr_type (insn) != TYPE_SFUNC)
10023 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10025 gcc_unreachable ();
10028 /* This function returns a constant rtx that represents pi / 2**15 in
10029 SFmode. it's used to scale SFmode angles, in radians, to a
10030 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10031 maps to 0x10000). */
10033 static GTY(()) rtx sh_fsca_sf2int_rtx;
10036 sh_fsca_sf2int (void)
10038 if (! sh_fsca_sf2int_rtx)
10040 REAL_VALUE_TYPE rv;
10042 real_from_string (&rv, "10430.378350470453");
10043 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10046 return sh_fsca_sf2int_rtx;
10049 /* This function returns a constant rtx that represents pi / 2**15 in
10050 DFmode. it's used to scale DFmode angles, in radians, to a
10051 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10052 maps to 0x10000). */
10054 static GTY(()) rtx sh_fsca_df2int_rtx;
10057 sh_fsca_df2int (void)
10059 if (! sh_fsca_df2int_rtx)
10061 REAL_VALUE_TYPE rv;
10063 real_from_string (&rv, "10430.378350470453");
10064 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10067 return sh_fsca_df2int_rtx;
10070 /* This function returns a constant rtx that represents 2**15 / pi in
10071 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10072 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10075 static GTY(()) rtx sh_fsca_int2sf_rtx;
10078 sh_fsca_int2sf (void)
10080 if (! sh_fsca_int2sf_rtx)
10082 REAL_VALUE_TYPE rv;
10084 real_from_string (&rv, "9.587379924285257e-5");
10085 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10088 return sh_fsca_int2sf_rtx;
10091 /* Initialize the CUMULATIVE_ARGS structure. */
10094 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10096 rtx libname ATTRIBUTE_UNUSED,
10098 signed int n_named_args,
10099 enum machine_mode mode)
10101 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10102 pcum->free_single_fp_reg = 0;
10103 pcum->stack_regs = 0;
10104 pcum->byref_regs = 0;
10106 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10108 /* XXX - Should we check TARGET_HITACHI here ??? */
10109 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10113 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10114 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10115 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10116 pcum->arg_count [(int) SH_ARG_INT]
10117 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10120 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10121 && pcum->arg_count [(int) SH_ARG_INT] == 0
10122 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10123 ? int_size_in_bytes (TREE_TYPE (fntype))
10124 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10125 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10126 == FIRST_RET_REG));
10130 pcum->arg_count [(int) SH_ARG_INT] = 0;
10131 pcum->prototype_p = FALSE;
10132 if (mode != VOIDmode)
10134 pcum->call_cookie =
10135 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10136 && GET_MODE_SIZE (mode) > 4
10137 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10139 /* If the default ABI is the Renesas ABI then all library
10140 calls must assume that the library will be using the
10141 Renesas ABI. So if the function would return its result
10142 in memory then we must force the address of this memory
10143 block onto the stack. Ideally we would like to call
10144 targetm.calls.return_in_memory() here but we do not have
10145 the TYPE or the FNDECL available so we synthesize the
10146 contents of that function as best we can. */
10148 (TARGET_DEFAULT & MASK_HITACHI)
10149 && (mode == BLKmode
10150 || (GET_MODE_SIZE (mode) > 4
10151 && !(mode == DFmode
10152 && TARGET_FPU_DOUBLE)));
10156 pcum->call_cookie = 0;
10157 pcum->force_mem = FALSE;
10162 /* Determine if two hard register sets intersect.
10163 Return 1 if they do. */
10166 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10169 COPY_HARD_REG_SET (c, *a);
10170 AND_HARD_REG_SET (c, *b);
10171 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10177 #ifdef TARGET_ADJUST_UNROLL_MAX
10179 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10180 int max_unrolled_insns, int strength_reduce_p,
10183 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10184 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10186 /* Throttle back loop unrolling so that the costs of using more
10187 targets than the eight target register we have don't outweigh
10188 the benefits of unrolling. */
10190 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10191 int n_barriers = 0;
10196 int unroll_benefit = 0, mem_latency = 0;
10197 int base_cost, best_cost, cost;
10198 int factor, best_factor;
10200 unsigned max_iterations = 32767;
10202 int need_precond = 0, precond = 0;
10203 basic_block * bbs = get_loop_body (loop);
10204 struct niter_desc *desc;
10206 /* Assume that all labels inside the loop are used from inside the
10207 loop. If the loop has multiple entry points, it is unlikely to
10208 be unrolled anyways.
10209 Also assume that all calls are to different functions. That is
10210 somewhat pessimistic, but if you have lots of calls, unrolling the
10211 loop is not likely to gain you much in the first place. */
10212 i = loop->num_nodes - 1;
10213 for (insn = BB_HEAD (bbs[i]); ; )
10215 if (GET_CODE (insn) == CODE_LABEL)
10217 else if (GET_CODE (insn) == CALL_INSN)
10219 else if (GET_CODE (insn) == NOTE
10220 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10222 else if (GET_CODE (insn) == BARRIER)
10224 if (insn != BB_END (bbs[i]))
10225 insn = NEXT_INSN (insn);
10227 insn = BB_HEAD (bbs[i]);
10232 /* One label for the loop top is normal, and it won't be duplicated by
10235 return max_unrolled_insns;
10236 if (n_inner_loops > 0)
10238 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10239 dest = LABEL_NEXTREF (dest))
10241 for (i = n_exit_dest - 1;
10242 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10244 exit_dest[n_exit_dest++] = dest;
10246 /* If the loop top and call and exit destinations are enough to fill up
10247 the target registers, we're unlikely to do any more damage by
10249 if (n_calls + n_exit_dest >= 7)
10250 return max_unrolled_insns;
10252 /* ??? In the new loop unroller, there is no longer any strength
10253 reduction information available. Thus, when it comes to unrolling,
10254 we know the cost of everything, but we know the value of nothing. */
10256 if (strength_reduce_p
10257 && (unroll_type == LPT_UNROLL_RUNTIME
10258 || unroll_type == LPT_UNROLL_CONSTANT
10259 || unroll_type == LPT_PEEL_COMPLETELY))
10261 struct loop_ivs *ivs = LOOP_IVS (loop);
10262 struct iv_class *bl;
10264 /* We'll save one compare-and-branch in each loop body copy
10265 but the last one. */
10266 unroll_benefit = 1;
10267 /* Assess the benefit of removing biv & giv updates. */
10268 for (bl = ivs->list; bl; bl = bl->next)
10270 rtx increment = biv_total_increment (bl);
10271 struct induction *v;
10273 if (increment && GET_CODE (increment) == CONST_INT)
10276 for (v = bl->giv; v; v = v->next_iv)
10278 if (! v->ignore && v->same == 0
10279 && GET_CODE (v->mult_val) == CONST_INT)
10281 /* If this giv uses an array, try to determine
10282 a maximum iteration count from the size of the
10283 array. This need not be correct all the time,
10284 but should not be too far off the mark too often. */
10285 while (v->giv_type == DEST_ADDR)
10287 rtx mem = PATTERN (v->insn);
10288 tree mem_expr, type, size_tree;
10290 if (GET_CODE (SET_SRC (mem)) == MEM)
10291 mem = SET_SRC (mem);
10292 else if (GET_CODE (SET_DEST (mem)) == MEM)
10293 mem = SET_DEST (mem);
10296 mem_expr = MEM_EXPR (mem);
10299 type = TREE_TYPE (mem_expr);
10300 if (TREE_CODE (type) != ARRAY_TYPE
10301 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10303 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10306 TYPE_SIZE_UNIT (type));
10307 if (TREE_CODE (size_tree) == INTEGER_CST
10308 && ! TREE_INT_CST_HIGH (size_tree)
10309 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10310 max_iterations = TREE_INT_CST_LOW (size_tree);
10318 /* Assume there is at least some benefit. */
10319 unroll_benefit = 1;
10322 desc = get_simple_loop_desc (loop);
10323 n_iterations = desc->const_iter ? desc->niter : 0;
10325 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10327 if (! strength_reduce_p || ! n_iterations)
10329 if (! n_iterations)
10332 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10333 if (! n_iterations)
10336 #if 0 /* ??? See above - missing induction variable information. */
10337 while (unroll_benefit > 1) /* no loop */
10339 /* We include the benefit of biv/ giv updates. Check if some or
10340 all of these updates are likely to fit into a scheduling
10342 We check for the following case:
10343 - All the insns leading to the first JUMP_INSN are in a strict
10345 - there is at least one memory reference in them.
10347 When we find such a pattern, we assume that we can hide as many
10348 updates as the total of the load latency is, if we have an
10349 unroll factor of at least two. We might or might not also do
10350 this without unrolling, so rather than considering this as an
10351 extra unroll benefit, discount it in the unroll benefits of unroll
10352 factors higher than two. */
10356 insn = next_active_insn (loop->start);
10357 last_set = single_set (insn);
10360 if (GET_CODE (SET_SRC (last_set)) == MEM)
10362 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10364 if (! INSN_P (insn))
10366 if (GET_CODE (insn) == JUMP_INSN)
10368 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10370 /* Check if this is a to-be-reduced giv insn. */
10371 struct loop_ivs *ivs = LOOP_IVS (loop);
10372 struct iv_class *bl;
10373 struct induction *v;
10374 for (bl = ivs->list; bl; bl = bl->next)
10376 if (bl->biv->insn == insn)
10378 for (v = bl->giv; v; v = v->next_iv)
10379 if (v->insn == insn)
10387 set = single_set (insn);
10390 if (GET_CODE (SET_SRC (set)) == MEM)
10394 if (mem_latency < 0)
10396 else if (mem_latency > unroll_benefit - 1)
10397 mem_latency = unroll_benefit - 1;
10401 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10403 return max_unrolled_insns;
10405 n_dest = n_labels + n_calls + n_exit_dest;
10406 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10409 if (n_barriers * 2 > n_labels - 1)
10410 n_barriers = (n_labels - 1) / 2;
10411 for (factor = 2; factor <= 8; factor++)
10413 /* Bump up preconditioning cost for each power of two. */
10414 if (! (factor & (factor-1)))
10416 /* When preconditioning, only powers of two will be considered. */
10417 else if (need_precond)
10419 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10420 + (n_labels - 1) * factor + n_calls + n_exit_dest
10421 - (n_barriers * factor >> 1)
10424 = ((n_dest <= 8 ? 0 : n_dest - 7)
10425 - base_cost * factor
10426 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10427 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10428 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10431 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10432 if (cost < best_cost)
10435 best_factor = factor;
10438 threshold = best_factor * insn_count;
10439 if (max_unrolled_insns > threshold)
10440 max_unrolled_insns = threshold;
10442 return max_unrolled_insns;
10444 #endif /* TARGET_ADJUST_UNROLL_MAX */
10446 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10447 not enter into CONST_DOUBLE for the replace.
10449 Note that copying is not done so X must not be shared unless all copies
10450 are to be modified.
10452 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10453 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10454 replacements[n*2+1] - and that we take mode changes into account.
10456 If a replacement is ambiguous, return NULL_RTX.
10458 If MODIFY is zero, don't modify any rtl in place,
10459 just return zero or nonzero for failure / success. */
10462 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10467 /* The following prevents loops occurrence when we change MEM in
10468 CONST_DOUBLE onto the same CONST_DOUBLE. */
10469 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10472 for (i = n_replacements - 1; i >= 0 ; i--)
10473 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10474 return replacements[i*2+1];
10476 /* Allow this function to make replacements in EXPR_LISTs. */
10480 if (GET_CODE (x) == SUBREG)
10482 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10483 n_replacements, modify);
10485 if (GET_CODE (new) == CONST_INT)
10487 x = simplify_subreg (GET_MODE (x), new,
10488 GET_MODE (SUBREG_REG (x)),
10494 SUBREG_REG (x) = new;
10498 else if (GET_CODE (x) == REG)
10500 unsigned regno = REGNO (x);
10501 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10502 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10503 rtx result = NULL_RTX;
10505 for (i = n_replacements - 1; i >= 0; i--)
10507 rtx from = replacements[i*2];
10508 rtx to = replacements[i*2+1];
10509 unsigned from_regno, from_nregs, to_regno, new_regno;
10511 if (GET_CODE (from) != REG)
10513 from_regno = REGNO (from);
10514 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10515 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10516 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10518 if (regno < from_regno
10519 || regno + nregs > from_regno + nregs
10520 || GET_CODE (to) != REG
10523 to_regno = REGNO (to);
10524 if (to_regno < FIRST_PSEUDO_REGISTER)
10526 new_regno = regno + to_regno - from_regno;
10527 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10530 result = gen_rtx_REG (GET_MODE (x), new_regno);
10532 else if (GET_MODE (x) <= GET_MODE (to))
10533 result = gen_lowpart_common (GET_MODE (x), to);
10535 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10538 return result ? result : x;
10540 else if (GET_CODE (x) == ZERO_EXTEND)
10542 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10543 n_replacements, modify);
10545 if (GET_CODE (new) == CONST_INT)
10547 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10548 new, GET_MODE (XEXP (x, 0)));
10558 fmt = GET_RTX_FORMAT (GET_CODE (x));
10559 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10565 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10566 n_replacements, modify);
10572 else if (fmt[i] == 'E')
10573 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10575 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10576 n_replacements, modify);
10580 XVECEXP (x, i, j) = new;
10588 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10590 enum rtx_code code = TRUNCATE;
10592 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10594 rtx inner = XEXP (x, 0);
10595 enum machine_mode inner_mode = GET_MODE (inner);
10597 if (inner_mode == mode)
10599 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10601 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10602 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10604 code = GET_CODE (x);
10608 return gen_rtx_fmt_e (code, mode, x);
10611 /* called via for_each_rtx after reload, to clean up truncates of
10612 registers that span multiple actual hard registers. */
10614 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10618 if (GET_CODE (x) != TRUNCATE)
10621 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10623 enum machine_mode reg_mode = GET_MODE (reg);
10624 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10625 subreg_lowpart_offset (DImode, reg_mode));
10626 *(int*) n_changes += 1;
10632 /* Load and store depend on the highpart of the address. However,
10633 set_attr_alternative does not give well-defined results before reload,
10634 so we must look at the rtl ourselves to see if any of the feeding
10635 registers is used in a memref. */
10637 /* Called by sh_contains_memref_p via for_each_rtx. */
10639 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10641 return (GET_CODE (*loc) == MEM);
10644 /* Return nonzero iff INSN contains a MEM. */
10646 sh_contains_memref_p (rtx insn)
10648 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10651 /* FNADDR is the MEM expression from a call expander. Return an address
10652 to use in an SHmedia insn pattern. */
10654 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10658 fnaddr = XEXP (fnaddr, 0);
10659 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10660 if (flag_pic && is_sym)
10662 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10664 rtx reg = gen_reg_rtx (Pmode);
10666 /* We must not use GOTPLT for sibcalls, because PIC_REG
10667 must be restored before the PLT code gets to run. */
10669 emit_insn (gen_symGOT2reg (reg, fnaddr));
10671 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10676 fnaddr = gen_sym2PIC (fnaddr);
10677 PUT_MODE (fnaddr, Pmode);
10680 /* If ptabs might trap, make this visible to the rest of the compiler.
10681 We generally assume that symbols pertain to valid locations, but
10682 it is possible to generate invalid symbols with asm or linker tricks.
10683 In a list of functions where each returns its successor, an invalid
10684 symbol might denote an empty list. */
10685 if (!TARGET_PT_FIXED
10686 && (!is_sym || TARGET_INVALID_SYMBOLS)
10687 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10689 rtx tr = gen_reg_rtx (PDImode);
10691 emit_insn (gen_ptabs (tr, fnaddr));
10694 else if (! target_reg_operand (fnaddr, Pmode))
10695 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10700 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10701 enum machine_mode mode, secondary_reload_info *sri)
10705 if (REGCLASS_HAS_FP_REG (class)
10706 && ! TARGET_SHMEDIA
10707 && immediate_operand ((x), mode)
10708 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10709 && mode == SFmode && fldi_ok ()))
10713 sri->icode = CODE_FOR_reload_insf__frn;
10716 sri->icode = CODE_FOR_reload_indf__frn;
10719 /* ??? If we knew that we are in the appropriate mode -
10720 single precision - we could use a reload pattern directly. */
10725 if (class == FPUL_REGS
10726 && ((GET_CODE (x) == REG
10727 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10728 || REGNO (x) == T_REG))
10729 || GET_CODE (x) == PLUS))
10730 return GENERAL_REGS;
10731 if (class == FPUL_REGS && immediate_operand (x, mode))
10733 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
10734 return GENERAL_REGS;
10735 sri->icode = CODE_FOR_reload_insi__i_fpul;
10738 if (class == FPSCR_REGS
10739 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10740 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10741 return GENERAL_REGS;
10742 if (REGCLASS_HAS_FP_REG (class)
10744 && immediate_operand (x, mode)
10745 && x != CONST0_RTX (GET_MODE (x))
10746 && GET_MODE (x) != V4SFmode)
10747 return GENERAL_REGS;
10748 if ((mode == QImode || mode == HImode)
10749 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10751 sri->icode = ((mode == QImode)
10752 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10755 if (TARGET_SHMEDIA && class == GENERAL_REGS
10756 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10757 return TARGET_REGS;
10758 } /* end of input-only processing. */
10760 if (((REGCLASS_HAS_FP_REG (class)
10761 && (GET_CODE (x) == REG
10762 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10763 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10764 && TARGET_FMOVD))))
10765 || (REGCLASS_HAS_GENERAL_REG (class)
10766 && GET_CODE (x) == REG
10767 && FP_REGISTER_P (REGNO (x))))
10768 && ! TARGET_SHMEDIA
10769 && (mode == SFmode || mode == SImode))
10771 if ((class == FPUL_REGS
10772 || (REGCLASS_HAS_FP_REG (class)
10773 && ! TARGET_SHMEDIA && mode == SImode))
10774 && (GET_CODE (x) == MEM
10775 || (GET_CODE (x) == REG
10776 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10777 || REGNO (x) == T_REG
10778 || system_reg_operand (x, VOIDmode)))))
10780 if (class == FPUL_REGS)
10781 return GENERAL_REGS;
10784 if ((class == TARGET_REGS
10785 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10786 && !EXTRA_CONSTRAINT_Csy (x)
10787 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10788 return GENERAL_REGS;
10789 if ((class == MAC_REGS || class == PR_REGS)
10790 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10791 && class != REGNO_REG_CLASS (REGNO (x)))
10792 return GENERAL_REGS;
10793 if (class != GENERAL_REGS && GET_CODE (x) == REG
10794 && TARGET_REGISTER_P (REGNO (x)))
10795 return GENERAL_REGS;
10799 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10801 /* This defines the storage for the variable part of a -mboard= option.
10802 It is only required when using the sh-superh-elf target */
10804 const char * boardtype = "7750p2";
10805 const char * osruntime = "bare";