1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
41 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Saved operands from the last compare to use when we generate an scc
115 /* Provides the class number of the smallest class containing
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static bool sh_handle_option (size_t, const char *, int);
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void force_into (rtx, rtx);
176 static void print_slot (rtx);
177 static rtx add_constant (rtx, enum machine_mode, rtx);
178 static void dump_table (rtx, rtx);
179 static int hi_const (rtx);
180 static int broken_move (rtx);
181 static int mova_p (rtx);
182 static rtx find_barrier (int, rtx, rtx);
183 static int noncall_uses_reg (rtx, rtx, rtx *);
184 static rtx gen_block_redirect (rtx, int, int);
185 static void sh_reorg (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
187 static rtx frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET *, int);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static rtx mark_constant_pool_use (rtx);
194 const struct attribute_spec sh_attribute_table[];
195 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static int find_r0_life_regions (basic_block);
213 static void sh_md_init_global (FILE *, int, int);
214 static void sh_md_finish_global (FILE *, int);
215 static int rank_for_reorder (const void *, const void *);
216 static void swap_reorder (rtx *, int);
217 static void ready_reorder (rtx *, int);
218 static short high_pressure (enum machine_mode);
219 static int sh_reorder (FILE *, int, rtx *, int *, int);
220 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
221 static void sh_md_init (FILE *, int, int);
222 static int sh_variable_issue (FILE *, int, rtx, int);
224 static bool sh_function_ok_for_sibcall (tree, tree);
226 static bool sh_cannot_modify_jumps_p (void);
227 static int sh_target_reg_class (void);
228 static bool sh_optimize_target_register_callee_saved (bool);
229 static bool sh_ms_bitfield_layout_p (const_tree);
231 static void sh_init_builtins (void);
232 static void sh_media_init_builtins (void);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
249 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
250 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
251 static int scavenge_reg (HARD_REG_SET *s);
252 struct save_schedule_s;
253 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
254 struct save_schedule_s *, int);
256 static rtx sh_struct_value_rtx (tree, int);
257 static bool sh_return_in_memory (const_tree, const_tree);
258 static rtx sh_builtin_saveregs (void);
259 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
260 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
261 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
262 static tree sh_build_builtin_va_list (void);
263 static void sh_va_start (tree, rtx);
264 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
281 /* The next two are used for debug info when compiling with -gdwarf. */
282 #undef TARGET_ASM_UNALIGNED_HI_OP
283 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
284 #undef TARGET_ASM_UNALIGNED_SI_OP
285 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
287 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
288 #undef TARGET_ASM_UNALIGNED_DI_OP
289 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START sh_file_start
304 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
305 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
307 #undef TARGET_DEFAULT_TARGET_FLAGS
308 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
309 #undef TARGET_HANDLE_OPTION
310 #define TARGET_HANDLE_OPTION sh_handle_option
312 #undef TARGET_INSERT_ATTRIBUTES
313 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
318 #undef TARGET_SCHED_ISSUE_RATE
319 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
321 /* The next 5 hooks have been implemented for reenabling sched1. With the
322 help of these macros we are limiting the movement of insns in sched1 to
323 reduce the register pressure. The overall idea is to keep count of SImode
324 and SFmode regs required by already scheduled insns. When these counts
325 cross some threshold values; give priority to insns that free registers.
326 The insn that frees registers is most likely to be the insn with lowest
327 LUID (original insn order); but such an insn might be there in the stalled
328 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
329 upto a max of 8 cycles so that such insns may move from Q -> R.
331 The description of the hooks are as below:
333 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
334 scheduler; it is called inside the sched_init function just after
335 find_insn_reg_weights function call. It is used to calculate the SImode
336 and SFmode weights of insns of basic blocks; much similar to what
337 find_insn_reg_weights does.
338 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
340 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
341 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
344 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
345 high; reorder the ready queue so that the insn with lowest LUID will be
348 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
349 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
351 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
352 can be returned from TARGET_SCHED_REORDER2.
354 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
356 #undef TARGET_SCHED_DFA_NEW_CYCLE
357 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
359 #undef TARGET_SCHED_INIT_GLOBAL
360 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
362 #undef TARGET_SCHED_FINISH_GLOBAL
363 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
365 #undef TARGET_SCHED_VARIABLE_ISSUE
366 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
368 #undef TARGET_SCHED_REORDER
369 #define TARGET_SCHED_REORDER sh_reorder
371 #undef TARGET_SCHED_REORDER2
372 #define TARGET_SCHED_REORDER2 sh_reorder2
374 #undef TARGET_SCHED_INIT
375 #define TARGET_SCHED_INIT sh_md_init
377 #undef TARGET_CANNOT_MODIFY_JUMPS_P
378 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
379 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
380 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
381 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
382 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
383 sh_optimize_target_register_callee_saved
385 #undef TARGET_MS_BITFIELD_LAYOUT_P
386 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
388 #undef TARGET_INIT_BUILTINS
389 #define TARGET_INIT_BUILTINS sh_init_builtins
390 #undef TARGET_EXPAND_BUILTIN
391 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
393 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
394 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
396 #undef TARGET_CANNOT_COPY_INSN_P
397 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
398 #undef TARGET_RTX_COSTS
399 #define TARGET_RTX_COSTS sh_rtx_costs
400 #undef TARGET_ADDRESS_COST
401 #define TARGET_ADDRESS_COST sh_address_cost
402 #undef TARGET_ALLOCATE_INITIAL_VALUE
403 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
409 #undef TARGET_HAVE_TLS
410 #define TARGET_HAVE_TLS true
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_ARGS
416 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
417 #undef TARGET_PROMOTE_FUNCTION_RETURN
418 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
420 #undef TARGET_STRUCT_VALUE_RTX
421 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
425 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
426 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
427 #undef TARGET_SETUP_INCOMING_VARARGS
428 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
429 #undef TARGET_STRICT_ARGUMENT_NAMING
430 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
431 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
432 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
433 #undef TARGET_MUST_PASS_IN_STACK
434 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
435 #undef TARGET_PASS_BY_REFERENCE
436 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
437 #undef TARGET_CALLEE_COPIES
438 #define TARGET_CALLEE_COPIES sh_callee_copies
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_EXPAND_BUILTIN_VA_START
445 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
446 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
447 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
449 #undef TARGET_SCALAR_MODE_SUPPORTED_P
450 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
454 #undef TARGET_CHECK_PCH_TARGET_FLAGS
455 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
457 #undef TARGET_DWARF_CALLING_CONVENTION
458 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
460 /* Return regmode weight for insn. */
461 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
463 /* Return current register pressure for regmode. */
464 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
471 #undef TARGET_ENCODE_SECTION_INFO
472 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
475 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
476 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
480 #undef TARGET_SECONDARY_RELOAD
481 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
483 /* Machine-specific symbol_ref flags. */
484 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
486 struct gcc_target targetm = TARGET_INITIALIZER;
488 /* Implement TARGET_HANDLE_OPTION. */
491 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
492 int value ATTRIBUTE_UNUSED)
497 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
501 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
505 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
509 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
516 case OPT_m2a_single_only:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
540 case OPT_m4_100_nofpu:
541 case OPT_m4_200_nofpu:
542 case OPT_m4_300_nofpu:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
550 case OPT_m4_100_single:
551 case OPT_m4_200_single:
552 case OPT_m4_300_single:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
556 case OPT_m4_single_only:
557 case OPT_m4_100_single_only:
558 case OPT_m4_200_single_only:
559 case OPT_m4_300_single_only:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
576 case OPT_m4a_single_only:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
584 case OPT_m5_32media_nofpu:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
592 case OPT_m5_64media_nofpu:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
600 case OPT_m5_compact_nofpu:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
609 /* Print the operand address in x to the stream. */
612 print_operand_address (FILE *stream, rtx x)
614 switch (GET_CODE (x))
618 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
623 rtx base = XEXP (x, 0);
624 rtx index = XEXP (x, 1);
626 switch (GET_CODE (index))
629 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
630 reg_names[true_regnum (base)]);
636 int base_num = true_regnum (base);
637 int index_num = true_regnum (index);
639 fprintf (stream, "@(r0,%s)",
640 reg_names[MAX (base_num, index_num)]);
651 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
655 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
659 x = mark_constant_pool_use (x);
660 output_addr_const (stream, x);
665 /* Print operand x (an rtx) in assembler syntax to file stream
666 according to modifier code.
668 '.' print a .s if insn needs delay slot
669 ',' print LOCAL_LABEL_PREFIX
670 '@' print trap, rte or rts depending upon pragma interruptness
671 '#' output a nop if there is nothing to put in the delay slot
672 ''' print likelihood suffix (/u for unlikely).
673 '>' print branch target if -fverbose-asm
674 'O' print a constant without the #
675 'R' print the LSW of a dp value - changes if in little endian
676 'S' print the MSW of a dp value - changes if in little endian
677 'T' print the next word of a dp value - same as 'R' in big endian mode.
678 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
679 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
680 'N' print 'r63' if the operand is (const_int 0).
681 'd' print a V2SF reg as dN instead of fpN.
682 'm' print a pair `base,offset' or `base,index', for LD and ST.
683 'U' Likewise for {LD,ST}{HI,LO}.
684 'V' print the position of a single bit set.
685 'W' print the position of a single bit cleared.
686 't' print a memory address which is a register.
687 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
688 'o' output an operator. */
691 print_operand (FILE *stream, rtx x, int code)
694 enum machine_mode mode;
702 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
703 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
704 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
707 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
710 trapa_attr = lookup_attribute ("trap_exit",
711 DECL_ATTRIBUTES (current_function_decl));
713 fprintf (stream, "trapa #%ld",
714 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
715 else if (sh_cfun_interrupt_handler_p ())
717 if (sh_cfun_resbank_handler_p ())
718 fprintf (stream, "resbank\n");
719 fprintf (stream, "rte");
722 fprintf (stream, "rts");
725 /* Output a nop if there's nothing in the delay slot. */
726 if (dbr_sequence_length () == 0)
727 fprintf (stream, "\n\tnop");
731 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
733 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
734 fputs ("/u", stream);
738 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
740 fputs ("\t! target: ", stream);
741 output_addr_const (stream, JUMP_LABEL (current_output_insn));
745 x = mark_constant_pool_use (x);
746 output_addr_const (stream, x);
748 /* N.B.: %R / %S / %T adjust memory addresses by four.
749 For SHMEDIA, that means they can be used to access the first and
750 second 32 bit part of a 64 bit (or larger) value that
751 might be held in floating point registers or memory.
752 While they can be used to access 64 bit parts of a larger value
753 held in general purpose registers, that won't work with memory -
754 neither for fp registers, since the frxx names are used. */
756 if (REG_P (x) || GET_CODE (x) == SUBREG)
758 regno = true_regnum (x);
759 regno += FP_REGISTER_P (regno) ? 1 : LSW;
760 fputs (reg_names[regno], (stream));
764 x = adjust_address (x, SImode, 4 * LSW);
765 print_operand_address (stream, XEXP (x, 0));
772 if (mode == VOIDmode)
774 if (GET_MODE_SIZE (mode) >= 8)
775 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
777 print_operand (stream, sub, 0);
779 output_operand_lossage ("invalid operand to %%R");
783 if (REG_P (x) || GET_CODE (x) == SUBREG)
785 regno = true_regnum (x);
786 regno += FP_REGISTER_P (regno) ? 0 : MSW;
787 fputs (reg_names[regno], (stream));
791 x = adjust_address (x, SImode, 4 * MSW);
792 print_operand_address (stream, XEXP (x, 0));
799 if (mode == VOIDmode)
801 if (GET_MODE_SIZE (mode) >= 8)
802 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
804 print_operand (stream, sub, 0);
806 output_operand_lossage ("invalid operand to %%S");
810 /* Next word of a double. */
811 switch (GET_CODE (x))
814 fputs (reg_names[REGNO (x) + 1], (stream));
817 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
818 && GET_CODE (XEXP (x, 0)) != POST_INC)
819 x = adjust_address (x, SImode, 4);
820 print_operand_address (stream, XEXP (x, 0));
828 gcc_assert (GET_CODE (x) == MEM);
830 switch (GET_CODE (x))
834 print_operand (stream, x, 0);
842 switch (GET_CODE (x))
844 case PLUS: fputs ("add", stream); break;
845 case MINUS: fputs ("sub", stream); break;
846 case MULT: fputs ("mul", stream); break;
847 case DIV: fputs ("div", stream); break;
848 case EQ: fputs ("eq", stream); break;
849 case NE: fputs ("ne", stream); break;
850 case GT: case LT: fputs ("gt", stream); break;
851 case GE: case LE: fputs ("ge", stream); break;
852 case GTU: case LTU: fputs ("gtu", stream); break;
853 case GEU: case LEU: fputs ("geu", stream); break;
861 if (GET_CODE (x) == MEM
862 && GET_CODE (XEXP (x, 0)) == PLUS
863 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
864 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
869 if (GET_CODE (x) == MEM)
871 switch (GET_MODE (x))
873 case QImode: fputs (".b", stream); break;
874 case HImode: fputs (".w", stream); break;
875 case SImode: fputs (".l", stream); break;
876 case SFmode: fputs (".s", stream); break;
877 case DFmode: fputs (".d", stream); break;
878 default: gcc_unreachable ();
885 gcc_assert (GET_CODE (x) == MEM);
889 switch (GET_CODE (x))
893 print_operand (stream, x, 0);
894 fputs (", 0", stream);
898 print_operand (stream, XEXP (x, 0), 0);
899 fputs (", ", stream);
900 print_operand (stream, XEXP (x, 1), 0);
910 int num = exact_log2 (INTVAL (x));
911 gcc_assert (num >= 0);
912 fprintf (stream, "#%d", num);
918 int num = exact_log2 (~INTVAL (x));
919 gcc_assert (num >= 0);
920 fprintf (stream, "#%d", num);
925 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
927 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
931 if (x == CONST0_RTX (GET_MODE (x)))
933 fprintf ((stream), "r63");
938 if (GET_CODE (x) == CONST_INT)
940 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
950 switch (GET_CODE (x))
954 rtx inner = XEXP (x, 0);
956 enum machine_mode inner_mode;
958 /* We might see SUBREGs with vector mode registers inside. */
959 if (GET_CODE (inner) == SUBREG
960 && (GET_MODE_SIZE (GET_MODE (inner))
961 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
962 && subreg_lowpart_p (inner))
963 inner = SUBREG_REG (inner);
964 if (GET_CODE (inner) == CONST_INT)
966 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
969 inner_mode = GET_MODE (inner);
970 if (GET_CODE (inner) == SUBREG
971 && (GET_MODE_SIZE (GET_MODE (inner))
972 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
973 && GET_CODE (SUBREG_REG (inner)) == REG)
975 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
976 GET_MODE (SUBREG_REG (inner)),
979 inner = SUBREG_REG (inner);
981 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
983 /* Floating point register pairs are always big endian;
984 general purpose registers are 64 bit wide. */
985 regno = REGNO (inner);
986 regno = (HARD_REGNO_NREGS (regno, inner_mode)
987 - HARD_REGNO_NREGS (regno, mode))
995 /* FIXME: We need this on SHmedia32 because reload generates
996 some sign-extended HI or QI loads into DImode registers
997 but, because Pmode is SImode, the address ends up with a
998 subreg:SI of the DImode register. Maybe reload should be
999 fixed so as to apply alter_subreg to such loads? */
1001 gcc_assert (trapping_target_operand (x, VOIDmode));
1002 x = XEXP (XEXP (x, 2), 0);
1003 goto default_output;
1005 gcc_assert (SUBREG_BYTE (x) == 0
1006 && GET_CODE (SUBREG_REG (x)) == REG);
1014 if (FP_REGISTER_P (regno)
1015 && mode == V16SFmode)
1016 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1017 else if (FP_REGISTER_P (REGNO (x))
1018 && mode == V4SFmode)
1019 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1020 else if (GET_CODE (x) == REG
1021 && mode == V2SFmode)
1022 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1023 else if (FP_REGISTER_P (REGNO (x))
1024 && GET_MODE_SIZE (mode) > 4)
1025 fprintf ((stream), "d%s", reg_names[regno] + 1);
1027 fputs (reg_names[regno], (stream));
1031 output_address (XEXP (x, 0));
1036 fputc ('#', stream);
1037 output_addr_const (stream, x);
1045 /* Encode symbol attributes of a SYMBOL_REF into its
1046 SYMBOL_REF_FLAGS. */
1048 sh_encode_section_info (tree decl, rtx rtl, int first)
1050 default_encode_section_info (decl, rtl, first);
1052 if (TREE_CODE (decl) == FUNCTION_DECL
1053 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1054 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1057 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1059 force_into (rtx value, rtx target)
1061 value = force_operand (value, target);
1062 if (! rtx_equal_p (value, target))
1063 emit_insn (gen_move_insn (target, value));
1066 /* Emit code to perform a block move. Choose the best method.
1068 OPERANDS[0] is the destination.
1069 OPERANDS[1] is the source.
1070 OPERANDS[2] is the size.
1071 OPERANDS[3] is the alignment safe to use. */
1074 expand_block_move (rtx *operands)
1076 int align = INTVAL (operands[3]);
1077 int constp = (GET_CODE (operands[2]) == CONST_INT);
1078 int bytes = (constp ? INTVAL (operands[2]) : 0);
1083 /* If we could use mov.l to move words and dest is word-aligned, we
1084 can use movua.l for loads and still generate a relatively short
1085 and efficient sequence. */
1086 if (TARGET_SH4A_ARCH && align < 4
1087 && MEM_ALIGN (operands[0]) >= 32
1088 && can_move_by_pieces (bytes, 32))
1090 rtx dest = copy_rtx (operands[0]);
1091 rtx src = copy_rtx (operands[1]);
1092 /* We could use different pseudos for each copied word, but
1093 since movua can only load into r0, it's kind of
1095 rtx temp = gen_reg_rtx (SImode);
1096 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1099 while (copied + 4 <= bytes)
1101 rtx to = adjust_address (dest, SImode, copied);
1102 rtx from = adjust_automodify_address (src, BLKmode,
1105 set_mem_size (from, GEN_INT (4));
1106 emit_insn (gen_movua (temp, from));
1107 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1108 emit_move_insn (to, temp);
1113 move_by_pieces (adjust_address (dest, BLKmode, copied),
1114 adjust_automodify_address (src, BLKmode,
1116 bytes - copied, align, 0);
1121 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1122 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1123 if (align < 4 || (bytes % 4 != 0))
1126 if (TARGET_HARD_SH4)
1130 else if (bytes == 12)
1132 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1133 rtx r4 = gen_rtx_REG (SImode, 4);
1134 rtx r5 = gen_rtx_REG (SImode, 5);
1136 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1137 force_into (XEXP (operands[0], 0), r4);
1138 force_into (XEXP (operands[1], 0), r5);
1139 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1142 else if (! TARGET_SMALLCODE)
1144 const char *entry_name;
1145 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1147 rtx r4 = gen_rtx_REG (SImode, 4);
1148 rtx r5 = gen_rtx_REG (SImode, 5);
1149 rtx r6 = gen_rtx_REG (SImode, 6);
1151 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1152 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1153 force_into (XEXP (operands[0], 0), r4);
1154 force_into (XEXP (operands[1], 0), r5);
1156 dwords = bytes >> 3;
1157 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1158 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1167 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1168 rtx r4 = gen_rtx_REG (SImode, 4);
1169 rtx r5 = gen_rtx_REG (SImode, 5);
1171 sprintf (entry, "__movmemSI%d", bytes);
1172 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1173 force_into (XEXP (operands[0], 0), r4);
1174 force_into (XEXP (operands[1], 0), r5);
1175 emit_insn (gen_block_move_real (func_addr_rtx));
1179 /* This is the same number of bytes as a memcpy call, but to a different
1180 less common function name, so this will occasionally use more space. */
1181 if (! TARGET_SMALLCODE)
1183 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1184 int final_switch, while_loop;
1185 rtx r4 = gen_rtx_REG (SImode, 4);
1186 rtx r5 = gen_rtx_REG (SImode, 5);
1187 rtx r6 = gen_rtx_REG (SImode, 6);
1189 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1190 force_into (XEXP (operands[0], 0), r4);
1191 force_into (XEXP (operands[1], 0), r5);
1193 /* r6 controls the size of the move. 16 is decremented from it
1194 for each 64 bytes moved. Then the negative bit left over is used
1195 as an index into a list of move instructions. e.g., a 72 byte move
1196 would be set up with size(r6) = 14, for one iteration through the
1197 big while loop, and a switch of -2 for the last part. */
1199 final_switch = 16 - ((bytes / 4) % 16);
1200 while_loop = ((bytes / 4) / 16 - 1) * 16;
1201 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1202 emit_insn (gen_block_lump_real (func_addr_rtx));
1209 /* Prepare operands for a move define_expand; specifically, one of the
1210 operands must be in a register. */
1213 prepare_move_operands (rtx operands[], enum machine_mode mode)
1215 if ((mode == SImode || mode == DImode)
1217 && ! ((mode == Pmode || mode == ptr_mode)
1218 && tls_symbolic_operand (operands[1], Pmode) != 0))
1221 if (SYMBOLIC_CONST_P (operands[1]))
1223 if (GET_CODE (operands[0]) == MEM)
1224 operands[1] = force_reg (Pmode, operands[1]);
1225 else if (TARGET_SHMEDIA
1226 && GET_CODE (operands[1]) == LABEL_REF
1227 && target_reg_operand (operands[0], mode))
1231 temp = (!can_create_pseudo_p ()
1233 : gen_reg_rtx (Pmode));
1234 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1237 else if (GET_CODE (operands[1]) == CONST
1238 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1239 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1241 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1242 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1244 operands[1] = expand_binop (mode, add_optab, temp,
1245 XEXP (XEXP (operands[1], 0), 1),
1246 (!can_create_pseudo_p ()
1248 : gen_reg_rtx (Pmode)),
1249 0, OPTAB_LIB_WIDEN);
1253 if (! reload_in_progress && ! reload_completed)
1255 /* Copy the source to a register if both operands aren't registers. */
1256 if (! register_operand (operands[0], mode)
1257 && ! sh_register_operand (operands[1], mode))
1258 operands[1] = copy_to_mode_reg (mode, operands[1]);
1260 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1262 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1263 except that we can't use that function because it is static. */
1264 rtx new_rtx = change_address (operands[0], mode, 0);
1265 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1266 operands[0] = new_rtx;
1269 /* This case can happen while generating code to move the result
1270 of a library call to the target. Reject `st r0,@(rX,rY)' because
1271 reload will fail to find a spill register for rX, since r0 is already
1272 being used for the source. */
1274 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1275 && GET_CODE (operands[0]) == MEM
1276 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1277 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1278 operands[1] = copy_to_mode_reg (mode, operands[1]);
1281 if (mode == Pmode || mode == ptr_mode)
1284 enum tls_model tls_kind;
1288 if (GET_CODE (op1) == CONST
1289 && GET_CODE (XEXP (op1, 0)) == PLUS
1290 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1292 opc = XEXP (XEXP (op1, 0), 1);
1293 op1 = XEXP (XEXP (op1, 0), 0);
1298 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1300 rtx tga_op1, tga_ret, tmp, tmp2;
1304 case TLS_MODEL_GLOBAL_DYNAMIC:
1305 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1306 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1310 case TLS_MODEL_LOCAL_DYNAMIC:
1311 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1312 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1314 tmp = gen_reg_rtx (Pmode);
1315 emit_move_insn (tmp, tga_ret);
1317 if (register_operand (op0, Pmode))
1320 tmp2 = gen_reg_rtx (Pmode);
1322 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1326 case TLS_MODEL_INITIAL_EXEC:
1329 /* Don't schedule insns for getting GOT address when
1330 the first scheduling is enabled, to avoid spill
1332 if (flag_schedule_insns)
1333 emit_insn (gen_blockage ());
1334 emit_insn (gen_GOTaddr2picreg ());
1335 emit_use (gen_rtx_REG (SImode, PIC_REG));
1336 if (flag_schedule_insns)
1337 emit_insn (gen_blockage ());
1339 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1340 tmp = gen_sym2GOTTPOFF (op1);
1341 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1345 case TLS_MODEL_LOCAL_EXEC:
1346 tmp2 = gen_reg_rtx (Pmode);
1347 emit_insn (gen_load_gbr (tmp2));
1348 tmp = gen_reg_rtx (Pmode);
1349 emit_insn (gen_symTPOFF2reg (tmp, op1));
1351 if (register_operand (op0, Pmode))
1354 op1 = gen_reg_rtx (Pmode);
1356 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1363 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1372 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1373 enum rtx_code comparison)
1376 rtx scratch = NULL_RTX;
1378 if (comparison == CODE_FOR_nothing)
1379 comparison = GET_CODE (operands[0]);
1381 scratch = operands[4];
1382 if (GET_CODE (operands[1]) == CONST_INT
1383 && GET_CODE (operands[2]) != CONST_INT)
1385 rtx tmp = operands[1];
1387 operands[1] = operands[2];
1389 comparison = swap_condition (comparison);
1391 if (GET_CODE (operands[2]) == CONST_INT)
1393 HOST_WIDE_INT val = INTVAL (operands[2]);
1394 if ((val == -1 || val == -0x81)
1395 && (comparison == GT || comparison == LE))
1397 comparison = (comparison == GT) ? GE : LT;
1398 operands[2] = gen_int_mode (val + 1, mode);
1400 else if ((val == 1 || val == 0x80)
1401 && (comparison == GE || comparison == LT))
1403 comparison = (comparison == GE) ? GT : LE;
1404 operands[2] = gen_int_mode (val - 1, mode);
1406 else if (val == 1 && (comparison == GEU || comparison == LTU))
1408 comparison = (comparison == GEU) ? NE : EQ;
1409 operands[2] = CONST0_RTX (mode);
1411 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1413 comparison = (comparison == GEU) ? GTU : LEU;
1414 operands[2] = gen_int_mode (val - 1, mode);
1416 else if (val == 0 && (comparison == GTU || comparison == LEU))
1417 comparison = (comparison == GTU) ? NE : EQ;
1418 else if (mode == SImode
1419 && ((val == 0x7fffffff
1420 && (comparison == GTU || comparison == LEU))
1421 || ((unsigned HOST_WIDE_INT) val
1422 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1423 && (comparison == GEU || comparison == LTU))))
1425 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1426 operands[2] = CONST0_RTX (mode);
1430 if (can_create_pseudo_p ())
1431 operands[1] = force_reg (mode, op1);
1432 /* When we are handling DImode comparisons, we want to keep constants so
1433 that we can optimize the component comparisons; however, memory loads
1434 are better issued as a whole so that they can be scheduled well.
1435 SImode equality comparisons allow I08 constants, but only when they
1436 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1437 into a register, that register might as well be r0, and we allow the
1438 constant. If it is already in a register, this is likely to be
1439 allocated to a different hard register, thus we load the constant into
1440 a register unless it is zero. */
1441 if (!REG_P (operands[2])
1442 && (GET_CODE (operands[2]) != CONST_INT
1443 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1444 && ((comparison != EQ && comparison != NE)
1445 || (REG_P (op1) && REGNO (op1) != R0_REG)
1446 || !satisfies_constraint_I08 (operands[2])))))
1448 if (scratch && GET_MODE (scratch) == mode)
1450 emit_move_insn (scratch, operands[2]);
1451 operands[2] = scratch;
1453 else if (can_create_pseudo_p ())
1454 operands[2] = force_reg (mode, operands[2]);
1460 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1462 rtx (*branch_expander) (rtx) = gen_branch_true;
1465 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1468 case NE: case LT: case LE: case LTU: case LEU:
1469 comparison = reverse_condition (comparison);
1470 branch_expander = gen_branch_false;
1473 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1474 gen_rtx_fmt_ee (comparison, SImode,
1475 operands[1], operands[2])));
1476 jump = emit_jump_insn (branch_expander (operands[3]));
1477 if (probability >= 0)
1479 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1484 /* ??? How should we distribute probabilities when more than one branch
1485 is generated. So far we only have soem ad-hoc observations:
1486 - If the operands are random, they are likely to differ in both parts.
1487 - If comparing items in a hash chain, the operands are random or equal;
1488 operation should be EQ or NE.
1489 - If items are searched in an ordered tree from the root, we can expect
1490 the highpart to be unequal about half of the time; operation should be
1491 an inequality comparison, operands non-constant, and overall probability
1492 about 50%. Likewise for quicksort.
1493 - Range checks will be often made against constants. Even if we assume for
1494 simplicity an even distribution of the non-constant operand over a
1495 sub-range here, the same probability could be generated with differently
1496 wide sub-ranges - as long as the ratio of the part of the subrange that
1497 is before the threshold to the part that comes after the threshold stays
1498 the same. Thus, we can't really tell anything here;
1499 assuming random distribution is at least simple.
1503 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1505 enum rtx_code msw_taken, msw_skip, lsw_taken;
1506 rtx skip_label = NULL_RTX;
1507 rtx op1h, op1l, op2h, op2l;
1510 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1511 rtx scratch = operands[4];
1513 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1514 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1515 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1516 op1l = gen_lowpart (SImode, operands[1]);
1517 op2l = gen_lowpart (SImode, operands[2]);
1518 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1519 prob = split_branch_probability;
1520 rev_prob = REG_BR_PROB_BASE - prob;
1523 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1524 That costs 1 cycle more when the first branch can be predicted taken,
1525 but saves us mispredicts because only one branch needs prediction.
1526 It also enables generating the cmpeqdi_t-1 pattern. */
1528 if (TARGET_CMPEQDI_T)
1530 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1531 emit_jump_insn (gen_branch_true (operands[3]));
1538 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1540 msw_skip_prob = rev_prob;
1541 if (REG_BR_PROB_BASE <= 65535)
1542 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1545 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1549 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1550 / ((HOST_WIDEST_INT) prob << 32)))
1556 if (TARGET_CMPEQDI_T)
1558 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1559 emit_jump_insn (gen_branch_false (operands[3]));
1563 msw_taken_prob = prob;
1568 msw_taken = comparison;
1569 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1571 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1572 msw_skip = swap_condition (msw_taken);
1576 if (op2l == CONST0_RTX (SImode))
1577 msw_taken = comparison;
1580 msw_taken = comparison == GE ? GT : GTU;
1581 msw_skip = swap_condition (msw_taken);
1586 msw_taken = comparison;
1587 if (op2l == CONST0_RTX (SImode))
1589 msw_skip = swap_condition (msw_taken);
1593 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1594 msw_taken = comparison;
1598 if (comparison == LE)
1600 else if (op2h != CONST0_RTX (SImode))
1604 msw_skip = swap_condition (msw_taken);
1607 default: return false;
1609 num_branches = ((msw_taken != CODE_FOR_nothing)
1610 + (msw_skip != CODE_FOR_nothing)
1611 + (lsw_taken != CODE_FOR_nothing));
1612 if (comparison != EQ && comparison != NE && num_branches > 1)
1614 if (!CONSTANT_P (operands[2])
1615 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1616 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1618 msw_taken_prob = prob / 2U;
1620 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1621 lsw_taken_prob = prob;
1625 msw_taken_prob = prob;
1626 msw_skip_prob = REG_BR_PROB_BASE;
1627 /* ??? If we have a constant op2h, should we use that when
1628 calculating lsw_taken_prob? */
1629 lsw_taken_prob = prob;
1634 operands[4] = NULL_RTX;
1635 if (reload_completed
1636 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1637 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1639 emit_move_insn (scratch, operands[2]);
1640 operands[2] = scratch;
1642 if (msw_taken != CODE_FOR_nothing)
1643 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1644 if (msw_skip != CODE_FOR_nothing)
1646 rtx taken_label = operands[3];
1648 /* Operands were possibly modified, but msw_skip doesn't expect this.
1649 Always use the original ones. */
1650 if (msw_taken != CODE_FOR_nothing)
1656 operands[3] = skip_label = gen_label_rtx ();
1657 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1658 operands[3] = taken_label;
1662 if (lsw_taken != CODE_FOR_nothing)
1664 if (reload_completed
1665 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1666 operands[4] = scratch;
1667 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1669 if (msw_skip != CODE_FOR_nothing)
1670 emit_label (skip_label);
1674 /* Prepare the operands for an scc instruction; make sure that the
1675 compare has been done. */
1677 prepare_scc_operands (enum rtx_code code)
1679 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1680 enum rtx_code oldcode = code;
1681 enum machine_mode mode;
1683 /* First need a compare insn. */
1687 /* It isn't possible to handle this case. */
1704 if (code != oldcode)
1706 rtx tmp = sh_compare_op0;
1707 sh_compare_op0 = sh_compare_op1;
1708 sh_compare_op1 = tmp;
1711 mode = GET_MODE (sh_compare_op0);
1712 if (mode == VOIDmode)
1713 mode = GET_MODE (sh_compare_op1);
1715 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1716 if ((code != EQ && code != NE
1717 && (sh_compare_op1 != const0_rtx
1718 || code == GTU || code == GEU || code == LTU || code == LEU))
1719 || (mode == DImode && sh_compare_op1 != const0_rtx)
1720 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1721 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1723 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1724 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1725 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1726 gen_rtx_SET (VOIDmode, t_reg,
1727 gen_rtx_fmt_ee (code, SImode,
1728 sh_compare_op0, sh_compare_op1)),
1729 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1731 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1732 gen_rtx_fmt_ee (code, SImode,
1733 sh_compare_op0, sh_compare_op1)));
1738 /* Called from the md file, set up the operands of a compare instruction. */
1741 from_compare (rtx *operands, int code)
1743 enum machine_mode mode = GET_MODE (sh_compare_op0);
1745 if (mode == VOIDmode)
1746 mode = GET_MODE (sh_compare_op1);
1749 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1751 /* Force args into regs, since we can't use constants here. */
1752 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1753 if (sh_compare_op1 != const0_rtx
1754 || code == GTU || code == GEU
1755 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1756 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1758 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1760 from_compare (operands, GT);
1761 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1764 insn = gen_rtx_SET (VOIDmode,
1765 gen_rtx_REG (SImode, T_REG),
1766 gen_rtx_fmt_ee (code, SImode,
1767 sh_compare_op0, sh_compare_op1));
1768 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1770 insn = gen_rtx_PARALLEL (VOIDmode,
1772 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1773 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1779 /* Functions to output assembly code. */
1781 /* Return a sequence of instructions to perform DI or DF move.
1783 Since the SH cannot move a DI or DF in one instruction, we have
1784 to take care when we see overlapping source and dest registers. */
1787 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1788 enum machine_mode mode)
1790 rtx dst = operands[0];
1791 rtx src = operands[1];
1793 if (GET_CODE (dst) == MEM
1794 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1795 return "mov.l %T1,%0\n\tmov.l %1,%0";
1797 if (register_operand (dst, mode)
1798 && register_operand (src, mode))
1800 if (REGNO (src) == MACH_REG)
1801 return "sts mach,%S0\n\tsts macl,%R0";
1803 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1804 when mov.d r1,r0 do r1->r0 then r2->r1. */
1806 if (REGNO (src) + 1 == REGNO (dst))
1807 return "mov %T1,%T0\n\tmov %1,%0";
1809 return "mov %1,%0\n\tmov %T1,%T0";
1811 else if (GET_CODE (src) == CONST_INT)
1813 if (INTVAL (src) < 0)
1814 output_asm_insn ("mov #-1,%S0", operands);
1816 output_asm_insn ("mov #0,%S0", operands);
1818 return "mov %1,%R0";
1820 else if (GET_CODE (src) == MEM)
1823 int dreg = REGNO (dst);
1824 rtx inside = XEXP (src, 0);
1826 switch (GET_CODE (inside))
1829 ptrreg = REGNO (inside);
1833 ptrreg = subreg_regno (inside);
1837 ptrreg = REGNO (XEXP (inside, 0));
1838 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1839 an offsettable address. Unfortunately, offsettable addresses use
1840 QImode to check the offset, and a QImode offsettable address
1841 requires r0 for the other operand, which is not currently
1842 supported, so we can't use the 'o' constraint.
1843 Thus we must check for and handle r0+REG addresses here.
1844 We punt for now, since this is likely very rare. */
1845 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1849 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1851 return "mov.l %1,%0\n\tmov.l %1,%T0";
1856 /* Work out the safe way to copy. Copy into the second half first. */
1858 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1861 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1864 /* Print an instruction which would have gone into a delay slot after
1865 another instruction, but couldn't because the other instruction expanded
1866 into a sequence where putting the slot insn at the end wouldn't work. */
1869 print_slot (rtx insn)
1871 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1873 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1877 output_far_jump (rtx insn, rtx op)
1879 struct { rtx lab, reg, op; } this_jmp;
1880 rtx braf_base_lab = NULL_RTX;
1883 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1886 this_jmp.lab = gen_label_rtx ();
1890 && offset - get_attr_length (insn) <= 32766)
1893 jump = "mov.w %O0,%1; braf %1";
1901 jump = "mov.l %O0,%1; braf %1";
1903 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1906 jump = "mov.l %O0,%1; jmp @%1";
1908 /* If we have a scratch register available, use it. */
1909 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1910 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1912 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1913 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1914 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1915 output_asm_insn (jump, &this_jmp.lab);
1916 if (dbr_sequence_length ())
1917 print_slot (final_sequence);
1919 output_asm_insn ("nop", 0);
1923 /* Output the delay slot insn first if any. */
1924 if (dbr_sequence_length ())
1925 print_slot (final_sequence);
1927 this_jmp.reg = gen_rtx_REG (SImode, 13);
1928 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1929 Fortunately, MACL is fixed and call-clobbered, and we never
1930 need its value across jumps, so save r13 in it instead of in
1933 output_asm_insn ("lds r13, macl", 0);
1935 output_asm_insn ("mov.l r13,@-r15", 0);
1936 output_asm_insn (jump, &this_jmp.lab);
1938 output_asm_insn ("sts macl, r13", 0);
1940 output_asm_insn ("mov.l @r15+,r13", 0);
1942 if (far && flag_pic && TARGET_SH2)
1944 braf_base_lab = gen_label_rtx ();
1945 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1946 CODE_LABEL_NUMBER (braf_base_lab));
1949 output_asm_insn (".align 2", 0);
1950 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
1952 if (far && flag_pic)
1955 this_jmp.lab = braf_base_lab;
1956 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
1959 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
1963 /* Local label counter, used for constants in the pool and inside
1964 pattern branches. */
1966 static int lf = 100;
1968 /* Output code for ordinary branches. */
1971 output_branch (int logic, rtx insn, rtx *operands)
1973 switch (get_attr_length (insn))
1976 /* This can happen if filling the delay slot has caused a forward
1977 branch to exceed its range (we could reverse it, but only
1978 when we know we won't overextend other branches; this should
1979 best be handled by relaxation).
1980 It can also happen when other condbranches hoist delay slot insn
1981 from their destination, thus leading to code size increase.
1982 But the branch will still be in the range -4092..+4098 bytes. */
1987 /* The call to print_slot will clobber the operands. */
1988 rtx op0 = operands[0];
1990 /* If the instruction in the delay slot is annulled (true), then
1991 there is no delay slot where we can put it now. The only safe
1992 place for it is after the label. final will do that by default. */
1995 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1996 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1998 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1999 ASSEMBLER_DIALECT ? "/" : ".", label);
2000 print_slot (final_sequence);
2003 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2005 output_asm_insn ("bra\t%l0", &op0);
2006 fprintf (asm_out_file, "\tnop\n");
2007 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2011 /* When relaxing, handle this like a short branch. The linker
2012 will fix it up if it still doesn't fit after relaxation. */
2014 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2016 /* These are for SH2e, in which we have to account for the
2017 extra nop because of the hardware bug in annulled branches. */
2023 gcc_assert (!final_sequence
2024 || !(INSN_ANNULLED_BRANCH_P
2025 (XVECEXP (final_sequence, 0, 0))));
2026 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2028 ASSEMBLER_DIALECT ? "/" : ".", label);
2029 fprintf (asm_out_file, "\tnop\n");
2030 output_asm_insn ("bra\t%l0", operands);
2031 fprintf (asm_out_file, "\tnop\n");
2032 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2036 /* When relaxing, fall through. */
2041 sprintf (buffer, "b%s%ss\t%%l0",
2043 ASSEMBLER_DIALECT ? "/" : ".");
2044 output_asm_insn (buffer, &operands[0]);
2049 /* There should be no longer branches now - that would
2050 indicate that something has destroyed the branches set
2051 up in machine_dependent_reorg. */
2056 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2057 fill in operands 9 as a label to the successor insn.
2058 We try to use jump threading where possible.
2059 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2060 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2061 follow jmp and bt, if the address is in range. */
2063 output_branchy_insn (enum rtx_code code, const char *templ,
2064 rtx insn, rtx *operands)
2066 rtx next_insn = NEXT_INSN (insn);
2068 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2070 rtx src = SET_SRC (PATTERN (next_insn));
2071 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2073 /* Following branch not taken */
2074 operands[9] = gen_label_rtx ();
2075 emit_label_after (operands[9], next_insn);
2076 INSN_ADDRESSES_NEW (operands[9],
2077 INSN_ADDRESSES (INSN_UID (next_insn))
2078 + get_attr_length (next_insn));
2083 int offset = (branch_dest (next_insn)
2084 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2085 if (offset >= -252 && offset <= 258)
2087 if (GET_CODE (src) == IF_THEN_ELSE)
2089 src = XEXP (src, 1);
2095 operands[9] = gen_label_rtx ();
2096 emit_label_after (operands[9], insn);
2097 INSN_ADDRESSES_NEW (operands[9],
2098 INSN_ADDRESSES (INSN_UID (insn))
2099 + get_attr_length (insn));
2104 output_ieee_ccmpeq (rtx insn, rtx *operands)
2106 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2110 /* Output the start of the assembler file. */
2113 sh_file_start (void)
2115 default_file_start ();
2118 /* Declare the .directive section before it is used. */
2119 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2120 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2124 /* We need to show the text section with the proper
2125 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2126 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2127 will complain. We can teach GAS specifically about the
2128 default attributes for our choice of text section, but
2129 then we would have to change GAS again if/when we change
2130 the text section name. */
2131 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2133 /* Switch to the data section so that the coffsem symbol
2134 isn't in the text section. */
2135 switch_to_section (data_section);
2137 if (TARGET_LITTLE_ENDIAN)
2138 fputs ("\t.little\n", asm_out_file);
2142 if (TARGET_SHCOMPACT)
2143 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2144 else if (TARGET_SHMEDIA)
2145 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2146 TARGET_SHMEDIA64 ? 64 : 32);
2150 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2153 unspec_caller_rtx_p (rtx pat)
2158 split_const (pat, &base, &offset);
2159 if (GET_CODE (base) == UNSPEC)
2161 if (XINT (base, 1) == UNSPEC_CALLER)
2163 for (i = 0; i < XVECLEN (base, 0); i++)
2164 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2170 /* Indicate that INSN cannot be duplicated. This is true for insn
2171 that generates a unique label. */
2174 sh_cannot_copy_insn_p (rtx insn)
2178 if (!reload_completed || !flag_pic)
2181 if (GET_CODE (insn) != INSN)
2183 if (asm_noperands (insn) >= 0)
2186 pat = PATTERN (insn);
2187 if (GET_CODE (pat) != SET)
2189 pat = SET_SRC (pat);
2191 if (unspec_caller_rtx_p (pat))
2197 /* Actual number of instructions used to make a shift by N. */
2198 static const char ashiftrt_insns[] =
2199 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2201 /* Left shift and logical right shift are the same. */
2202 static const char shift_insns[] =
2203 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2205 /* Individual shift amounts needed to get the above length sequences.
2206 One bit right shifts clobber the T bit, so when possible, put one bit
2207 shifts in the middle of the sequence, so the ends are eligible for
2208 branch delay slots. */
2209 static const short shift_amounts[32][5] = {
2210 {0}, {1}, {2}, {2, 1},
2211 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2212 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2213 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2214 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2215 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2216 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2217 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2219 /* Likewise, but for shift amounts < 16, up to three highmost bits
2220 might be clobbered. This is typically used when combined with some
2221 kind of sign or zero extension. */
2223 static const char ext_shift_insns[] =
2224 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2226 static const short ext_shift_amounts[32][4] = {
2227 {0}, {1}, {2}, {2, 1},
2228 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2229 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2230 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2231 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2232 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2233 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2234 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2236 /* Assuming we have a value that has been sign-extended by at least one bit,
2237 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2238 to shift it by N without data loss, and quicker than by other means? */
2239 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2241 /* This is used in length attributes in sh.md to help compute the length
2242 of arbitrary constant shift instructions. */
2245 shift_insns_rtx (rtx insn)
2247 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2248 int shift_count = INTVAL (XEXP (set_src, 1));
2249 enum rtx_code shift_code = GET_CODE (set_src);
2254 return ashiftrt_insns[shift_count];
2257 return shift_insns[shift_count];
2263 /* Return the cost of a shift. */
2273 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2275 if (GET_MODE (x) == DImode
2276 && GET_CODE (XEXP (x, 1)) == CONST_INT
2277 && INTVAL (XEXP (x, 1)) == 1)
2280 /* Everything else is invalid, because there is no pattern for it. */
2283 /* If shift by a non constant, then this will be expensive. */
2284 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2285 return SH_DYNAMIC_SHIFT_COST;
2287 value = INTVAL (XEXP (x, 1));
2289 /* Otherwise, return the true cost in instructions. */
2290 if (GET_CODE (x) == ASHIFTRT)
2292 int cost = ashiftrt_insns[value];
2293 /* If SH3, then we put the constant in a reg and use shad. */
2294 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2295 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2299 return shift_insns[value];
2302 /* Return the cost of an AND operation. */
2309 /* Anding with a register is a single cycle and instruction. */
2310 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2313 i = INTVAL (XEXP (x, 1));
2317 if (satisfies_constraint_I10 (XEXP (x, 1))
2318 || satisfies_constraint_J16 (XEXP (x, 1)))
2321 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2324 /* These constants are single cycle extu.[bw] instructions. */
2325 if (i == 0xff || i == 0xffff)
2327 /* Constants that can be used in an and immediate instruction in a single
2328 cycle, but this requires r0, so make it a little more expensive. */
2329 if (CONST_OK_FOR_K08 (i))
2331 /* Constants that can be loaded with a mov immediate and an and.
2332 This case is probably unnecessary. */
2333 if (CONST_OK_FOR_I08 (i))
2335 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2336 This case is probably unnecessary. */
2340 /* Return the cost of an addition or a subtraction. */
2345 /* Adding a register is a single cycle insn. */
2346 if (GET_CODE (XEXP (x, 1)) == REG
2347 || GET_CODE (XEXP (x, 1)) == SUBREG)
2350 /* Likewise for small constants. */
2351 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2352 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2356 switch (GET_CODE (XEXP (x, 1)))
2361 return TARGET_SHMEDIA64 ? 5 : 3;
2364 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2366 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2368 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2376 /* Any other constant requires a 2 cycle pc-relative load plus an
2381 /* Return the cost of a multiply. */
2383 multcosts (rtx x ATTRIBUTE_UNUSED)
2385 if (sh_multcost >= 0)
2388 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2389 accept constants. Ideally, we would use a cost of one or two and
2390 add the cost of the operand, but disregard the latter when inside loops
2391 and loop invariant code motion is still to follow.
2392 Using a multiply first and splitting it later if it's a loss
2393 doesn't work because of different sign / zero extension semantics
2394 of multiplies vs. shifts. */
2395 return TARGET_SMALLCODE ? 2 : 3;
2399 /* We have a mul insn, so we can never take more than the mul and the
2400 read of the mac reg, but count more because of the latency and extra
2402 if (TARGET_SMALLCODE)
2407 /* If we're aiming at small code, then just count the number of
2408 insns in a multiply call sequence. */
2409 if (TARGET_SMALLCODE)
2412 /* Otherwise count all the insns in the routine we'd be calling too. */
2416 /* Compute a (partial) cost for rtx X. Return true if the complete
2417 cost has been computed, and false if subexpressions should be
2418 scanned. In either case, *TOTAL contains the cost result. */
2421 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2422 bool speed ATTRIBUTE_UNUSED)
2429 if (INTVAL (x) == 0)
2431 else if (outer_code == AND && and_operand ((x), DImode))
2433 else if ((outer_code == IOR || outer_code == XOR
2434 || outer_code == PLUS)
2435 && CONST_OK_FOR_I10 (INTVAL (x)))
2437 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2438 *total = COSTS_N_INSNS (outer_code != SET);
2439 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2440 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2441 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2442 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2444 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2447 if (CONST_OK_FOR_I08 (INTVAL (x)))
2449 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2450 && CONST_OK_FOR_K08 (INTVAL (x)))
2452 /* prepare_cmp_insn will force costly constants int registers before
2453 the cbranch[sd]i4 patterns can see them, so preserve potentially
2454 interesting ones not covered by I08 above. */
2455 else if (outer_code == COMPARE
2456 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2457 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2458 || INTVAL (x) == 0x7fffffff
2459 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2468 if (TARGET_SHMEDIA64)
2469 *total = COSTS_N_INSNS (4);
2470 else if (TARGET_SHMEDIA32)
2471 *total = COSTS_N_INSNS (2);
2478 *total = COSTS_N_INSNS (4);
2479 /* prepare_cmp_insn will force costly constants int registers before
2480 the cbranchdi4 pattern can see them, so preserve potentially
2481 interesting ones. */
2482 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2488 if (x == CONST0_RTX (GET_MODE (x)))
2490 else if (sh_1el_vec (x, VOIDmode))
2491 *total = outer_code != SET;
2492 if (sh_rep_vec (x, VOIDmode))
2493 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2494 + (outer_code != SET));
2495 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2500 *total = COSTS_N_INSNS (addsubcosts (x));
2504 *total = COSTS_N_INSNS (andcosts (x));
2508 *total = COSTS_N_INSNS (multcosts (x));
2514 *total = COSTS_N_INSNS (shiftcosts (x));
2521 *total = COSTS_N_INSNS (20);
2525 if (sh_1el_vec (x, VOIDmode))
2526 *total = outer_code != SET;
2527 if (sh_rep_vec (x, VOIDmode))
2528 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2529 + (outer_code != SET));
2530 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2543 /* Compute the cost of an address. For the SH, all valid addresses are
2544 the same cost. Use a slightly higher cost for reg + reg addressing,
2545 since it increases pressure on r0. */
2548 sh_address_cost (rtx X,
2549 bool speed ATTRIBUTE_UNUSED)
2551 return (GET_CODE (X) == PLUS
2552 && ! CONSTANT_P (XEXP (X, 1))
2553 && ! TARGET_SHMEDIA ? 1 : 0);
2556 /* Code to expand a shift. */
2559 gen_ashift (int type, int n, rtx reg)
2561 /* Negative values here come from the shift_amounts array. */
2574 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2578 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2580 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2583 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2588 /* Same for HImode */
2591 gen_ashift_hi (int type, int n, rtx reg)
2593 /* Negative values here come from the shift_amounts array. */
2607 /* We don't have HImode right shift operations because using the
2608 ordinary 32 bit shift instructions for that doesn't generate proper
2609 zero/sign extension.
2610 gen_ashift_hi is only called in contexts where we know that the
2611 sign extension works out correctly. */
2614 if (GET_CODE (reg) == SUBREG)
2616 offset = SUBREG_BYTE (reg);
2617 reg = SUBREG_REG (reg);
2619 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2623 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2628 /* Output RTL to split a constant shift into its component SH constant
2629 shift instructions. */
2632 gen_shifty_op (int code, rtx *operands)
2634 int value = INTVAL (operands[2]);
2637 /* Truncate the shift count in case it is out of bounds. */
2638 value = value & 0x1f;
2642 if (code == LSHIFTRT)
2644 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2645 emit_insn (gen_movt (operands[0]));
2648 else if (code == ASHIFT)
2650 /* There is a two instruction sequence for 31 bit left shifts,
2651 but it requires r0. */
2652 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2654 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2655 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2660 else if (value == 0)
2662 /* This can happen even when optimizing, if there were subregs before
2663 reload. Don't output a nop here, as this is never optimized away;
2664 use a no-op move instead. */
2665 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2669 max = shift_insns[value];
2670 for (i = 0; i < max; i++)
2671 gen_ashift (code, shift_amounts[value][i], operands[0]);
2674 /* Same as above, but optimized for values where the topmost bits don't
2678 gen_shifty_hi_op (int code, rtx *operands)
2680 int value = INTVAL (operands[2]);
2682 void (*gen_fun) (int, int, rtx);
2684 /* This operation is used by and_shl for SImode values with a few
2685 high bits known to be cleared. */
2689 emit_insn (gen_nop ());
2693 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2696 max = ext_shift_insns[value];
2697 for (i = 0; i < max; i++)
2698 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2701 /* When shifting right, emit the shifts in reverse order, so that
2702 solitary negative values come first. */
2703 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2704 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2707 /* Output RTL for an arithmetic right shift. */
2709 /* ??? Rewrite to use super-optimizer sequences. */
2712 expand_ashiftrt (rtx *operands)
2720 if (GET_CODE (operands[2]) != CONST_INT)
2722 rtx count = copy_to_mode_reg (SImode, operands[2]);
2723 emit_insn (gen_negsi2 (count, count));
2724 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2727 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2728 > 1 + SH_DYNAMIC_SHIFT_COST)
2731 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2732 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2736 if (GET_CODE (operands[2]) != CONST_INT)
2739 value = INTVAL (operands[2]) & 31;
2743 /* If we are called from abs expansion, arrange things so that we
2744 we can use a single MT instruction that doesn't clobber the source,
2745 if LICM can hoist out the load of the constant zero. */
2746 if (currently_expanding_to_rtl)
2748 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2750 emit_insn (gen_mov_neg_si_t (operands[0]));
2753 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2756 else if (value >= 16 && value <= 19)
2758 wrk = gen_reg_rtx (SImode);
2759 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2762 gen_ashift (ASHIFTRT, 1, wrk);
2763 emit_move_insn (operands[0], wrk);
2766 /* Expand a short sequence inline, longer call a magic routine. */
2767 else if (value <= 5)
2769 wrk = gen_reg_rtx (SImode);
2770 emit_move_insn (wrk, operands[1]);
2772 gen_ashift (ASHIFTRT, 1, wrk);
2773 emit_move_insn (operands[0], wrk);
2777 wrk = gen_reg_rtx (Pmode);
2779 /* Load the value into an arg reg and call a helper. */
2780 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2781 sprintf (func, "__ashiftrt_r4_%d", value);
2782 function_symbol (wrk, func, SFUNC_STATIC);
2783 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2784 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2789 sh_dynamicalize_shift_p (rtx count)
2791 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2794 /* Try to find a good way to implement the combiner pattern
2795 [(set (match_operand:SI 0 "register_operand" "r")
2796 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2797 (match_operand:SI 2 "const_int_operand" "n"))
2798 (match_operand:SI 3 "const_int_operand" "n"))) .
2799 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2800 return 0 for simple right / left or left/right shift combination.
2801 return 1 for a combination of shifts with zero_extend.
2802 return 2 for a combination of shifts with an AND that needs r0.
2803 return 3 for a combination of shifts with an AND that needs an extra
2804 scratch register, when the three highmost bits of the AND mask are clear.
2805 return 4 for a combination of shifts with an AND that needs an extra
2806 scratch register, when any of the three highmost bits of the AND mask
2808 If ATTRP is set, store an initial right shift width in ATTRP[0],
2809 and the instruction length in ATTRP[1] . These values are not valid
2811 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2812 shift_amounts for the last shift value that is to be used before the
2815 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2817 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2818 int left = INTVAL (left_rtx), right;
2820 int cost, best_cost = 10000;
2821 int best_right = 0, best_len = 0;
2825 if (left < 0 || left > 31)
2827 if (GET_CODE (mask_rtx) == CONST_INT)
2828 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2830 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2831 /* Can this be expressed as a right shift / left shift pair? */
2832 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2833 right = exact_log2 (lsb);
2834 mask2 = ~(mask + lsb - 1);
2835 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2836 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2838 best_cost = shift_insns[right] + shift_insns[right + left];
2839 /* mask has no trailing zeroes <==> ! right */
2840 else if (! right && mask2 == ~(lsb2 - 1))
2842 int late_right = exact_log2 (lsb2);
2843 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2845 /* Try to use zero extend. */
2846 if (mask2 == ~(lsb2 - 1))
2850 for (width = 8; width <= 16; width += 8)
2852 /* Can we zero-extend right away? */
2853 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2856 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2857 if (cost < best_cost)
2868 /* ??? Could try to put zero extend into initial right shift,
2869 or even shift a bit left before the right shift. */
2870 /* Determine value of first part of left shift, to get to the
2871 zero extend cut-off point. */
2872 first = width - exact_log2 (lsb2) + right;
2873 if (first >= 0 && right + left - first >= 0)
2875 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2876 + ext_shift_insns[right + left - first];
2877 if (cost < best_cost)
2889 /* Try to use r0 AND pattern */
2890 for (i = 0; i <= 2; i++)
2894 if (! CONST_OK_FOR_K08 (mask >> i))
2896 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2897 if (cost < best_cost)
2902 best_len = cost - 1;
2905 /* Try to use a scratch register to hold the AND operand. */
2906 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2907 for (i = 0; i <= 2; i++)
2911 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2912 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2913 if (cost < best_cost)
2918 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2924 attrp[0] = best_right;
2925 attrp[1] = best_len;
2930 /* This is used in length attributes of the unnamed instructions
2931 corresponding to shl_and_kind return values of 1 and 2. */
2933 shl_and_length (rtx insn)
2935 rtx set_src, left_rtx, mask_rtx;
2938 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2939 left_rtx = XEXP (XEXP (set_src, 0), 1);
2940 mask_rtx = XEXP (set_src, 1);
2941 shl_and_kind (left_rtx, mask_rtx, attributes);
2942 return attributes[1];
2945 /* This is used in length attribute of the and_shl_scratch instruction. */
2948 shl_and_scr_length (rtx insn)
2950 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2951 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2952 rtx op = XEXP (set_src, 0);
2953 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2954 op = XEXP (XEXP (op, 0), 0);
2955 return len + shift_insns[INTVAL (XEXP (op, 1))];
2958 /* Generate rtl for instructions for which shl_and_kind advised a particular
2959 method of generating them, i.e. returned zero. */
2962 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2965 unsigned HOST_WIDE_INT mask;
2966 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2967 int right, total_shift;
2968 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2970 right = attributes[0];
2971 total_shift = INTVAL (left_rtx) + right;
2972 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2979 int first = attributes[2];
2984 emit_insn ((mask << right) <= 0xff
2985 ? gen_zero_extendqisi2 (dest,
2986 gen_lowpart (QImode, source))
2987 : gen_zero_extendhisi2 (dest,
2988 gen_lowpart (HImode, source)));
2992 emit_insn (gen_movsi (dest, source));
2996 operands[2] = GEN_INT (right);
2997 gen_shifty_hi_op (LSHIFTRT, operands);
3001 operands[2] = GEN_INT (first);
3002 gen_shifty_hi_op (ASHIFT, operands);
3003 total_shift -= first;
3007 emit_insn (mask <= 0xff
3008 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3009 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3010 if (total_shift > 0)
3012 operands[2] = GEN_INT (total_shift);
3013 gen_shifty_hi_op (ASHIFT, operands);
3018 shift_gen_fun = gen_shifty_op;
3020 /* If the topmost bit that matters is set, set the topmost bits
3021 that don't matter. This way, we might be able to get a shorter
3023 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3024 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3026 /* Don't expand fine-grained when combining, because that will
3027 make the pattern fail. */
3028 if (currently_expanding_to_rtl
3029 || reload_in_progress || reload_completed)
3033 /* Cases 3 and 4 should be handled by this split
3034 only while combining */
3035 gcc_assert (kind <= 2);
3038 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3041 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3046 operands[2] = GEN_INT (total_shift);
3047 shift_gen_fun (ASHIFT, operands);
3054 if (kind != 4 && total_shift < 16)
3056 neg = -ext_shift_amounts[total_shift][1];
3058 neg -= ext_shift_amounts[total_shift][2];
3062 emit_insn (gen_and_shl_scratch (dest, source,
3065 GEN_INT (total_shift + neg),
3067 emit_insn (gen_movsi (dest, dest));
3074 /* Try to find a good way to implement the combiner pattern
3075 [(set (match_operand:SI 0 "register_operand" "=r")
3076 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3077 (match_operand:SI 2 "const_int_operand" "n")
3078 (match_operand:SI 3 "const_int_operand" "n")
3080 (clobber (reg:SI T_REG))]
3081 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3082 return 0 for simple left / right shift combination.
3083 return 1 for left shift / 8 bit sign extend / left shift.
3084 return 2 for left shift / 16 bit sign extend / left shift.
3085 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3086 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3087 return 5 for left shift / 16 bit sign extend / right shift
3088 return 6 for < 8 bit sign extend / left shift.
3089 return 7 for < 8 bit sign extend / left shift / single right shift.
3090 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3093 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3095 int left, size, insize, ext;
3096 int cost = 0, best_cost;
3099 left = INTVAL (left_rtx);
3100 size = INTVAL (size_rtx);
3101 insize = size - left;
3102 gcc_assert (insize > 0);
3103 /* Default to left / right shift. */
3105 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3108 /* 16 bit shift / sign extend / 16 bit shift */
3109 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3110 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3111 below, by alternative 3 or something even better. */
3112 if (cost < best_cost)
3118 /* Try a plain sign extend between two shifts. */
3119 for (ext = 16; ext >= insize; ext -= 8)
3123 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3124 if (cost < best_cost)
3126 kind = ext / (unsigned) 8;
3130 /* Check if we can do a sloppy shift with a final signed shift
3131 restoring the sign. */
3132 if (EXT_SHIFT_SIGNED (size - ext))
3133 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3134 /* If not, maybe it's still cheaper to do the second shift sloppy,
3135 and do a final sign extend? */
3136 else if (size <= 16)
3137 cost = ext_shift_insns[ext - insize] + 1
3138 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3141 if (cost < best_cost)
3143 kind = ext / (unsigned) 8 + 2;
3147 /* Check if we can sign extend in r0 */
3150 cost = 3 + shift_insns[left];
3151 if (cost < best_cost)
3156 /* Try the same with a final signed shift. */
3159 cost = 3 + ext_shift_insns[left + 1] + 1;
3160 if (cost < best_cost)
3169 /* Try to use a dynamic shift. */
3170 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3171 if (cost < best_cost)
3182 /* Function to be used in the length attribute of the instructions
3183 implementing this pattern. */
3186 shl_sext_length (rtx insn)
3188 rtx set_src, left_rtx, size_rtx;
3191 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3192 left_rtx = XEXP (XEXP (set_src, 0), 1);
3193 size_rtx = XEXP (set_src, 1);
3194 shl_sext_kind (left_rtx, size_rtx, &cost);
3198 /* Generate rtl for this pattern */
3201 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3204 int left, size, insize, cost;
3207 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3208 left = INTVAL (left_rtx);
3209 size = INTVAL (size_rtx);
3210 insize = size - left;
3218 int ext = kind & 1 ? 8 : 16;
3219 int shift2 = size - ext;
3221 /* Don't expand fine-grained when combining, because that will
3222 make the pattern fail. */
3223 if (! currently_expanding_to_rtl
3224 && ! reload_in_progress && ! reload_completed)
3226 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3227 emit_insn (gen_movsi (dest, source));
3231 emit_insn (gen_movsi (dest, source));
3235 operands[2] = GEN_INT (ext - insize);
3236 gen_shifty_hi_op (ASHIFT, operands);
3239 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3240 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3245 operands[2] = GEN_INT (shift2);
3246 gen_shifty_op (ASHIFT, operands);
3253 if (EXT_SHIFT_SIGNED (shift2))
3255 operands[2] = GEN_INT (shift2 + 1);
3256 gen_shifty_op (ASHIFT, operands);
3257 operands[2] = const1_rtx;
3258 gen_shifty_op (ASHIFTRT, operands);
3261 operands[2] = GEN_INT (shift2);
3262 gen_shifty_hi_op (ASHIFT, operands);
3266 operands[2] = GEN_INT (-shift2);
3267 gen_shifty_hi_op (LSHIFTRT, operands);
3269 emit_insn (size <= 8
3270 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3271 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3278 if (! currently_expanding_to_rtl
3279 && ! reload_in_progress && ! reload_completed)
3280 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3284 operands[2] = GEN_INT (16 - insize);
3285 gen_shifty_hi_op (ASHIFT, operands);
3286 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3288 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3290 gen_ashift (ASHIFTRT, 1, dest);
3295 /* Don't expand fine-grained when combining, because that will
3296 make the pattern fail. */
3297 if (! currently_expanding_to_rtl
3298 && ! reload_in_progress && ! reload_completed)
3300 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3301 emit_insn (gen_movsi (dest, source));
3304 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3305 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3306 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3308 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3309 gen_shifty_op (ASHIFT, operands);
3311 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3319 /* Prefix a symbol_ref name with "datalabel". */
3322 gen_datalabel_ref (rtx sym)
3326 if (GET_CODE (sym) == LABEL_REF)
3327 return gen_rtx_CONST (GET_MODE (sym),
3328 gen_rtx_UNSPEC (GET_MODE (sym),
3332 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3334 str = XSTR (sym, 0);
3335 /* Share all SYMBOL_REF strings with the same value - that is important
3337 str = IDENTIFIER_POINTER (get_identifier (str));
3338 XSTR (sym, 0) = str;
3344 static alloc_pool label_ref_list_pool;
3346 typedef struct label_ref_list_d
3349 struct label_ref_list_d *next;
3350 } *label_ref_list_t;
3352 /* The SH cannot load a large constant into a register, constants have to
3353 come from a pc relative load. The reference of a pc relative load
3354 instruction must be less than 1k in front of the instruction. This
3355 means that we often have to dump a constant inside a function, and
3356 generate code to branch around it.
3358 It is important to minimize this, since the branches will slow things
3359 down and make things bigger.
3361 Worst case code looks like:
3379 We fix this by performing a scan before scheduling, which notices which
3380 instructions need to have their operands fetched from the constant table
3381 and builds the table.
3385 scan, find an instruction which needs a pcrel move. Look forward, find the
3386 last barrier which is within MAX_COUNT bytes of the requirement.
3387 If there isn't one, make one. Process all the instructions between
3388 the find and the barrier.
3390 In the above example, we can tell that L3 is within 1k of L1, so
3391 the first move can be shrunk from the 3 insn+constant sequence into
3392 just 1 insn, and the constant moved to L3 to make:
3403 Then the second move becomes the target for the shortening process. */
3407 rtx value; /* Value in table. */
3408 rtx label; /* Label of value. */
3409 label_ref_list_t wend; /* End of window. */
3410 enum machine_mode mode; /* Mode of value. */
3412 /* True if this constant is accessed as part of a post-increment
3413 sequence. Note that HImode constants are never accessed in this way. */
3414 bool part_of_sequence_p;
3417 /* The maximum number of constants that can fit into one pool, since
3418 constants in the range 0..510 are at least 2 bytes long, and in the
3419 range from there to 1018 at least 4 bytes. */
3421 #define MAX_POOL_SIZE 372
3422 static pool_node pool_vector[MAX_POOL_SIZE];
3423 static int pool_size;
3424 static rtx pool_window_label;
3425 static int pool_window_last;
3427 static int max_labelno_before_reorg;
3429 /* ??? If we need a constant in HImode which is the truncated value of a
3430 constant we need in SImode, we could combine the two entries thus saving
3431 two bytes. Is this common enough to be worth the effort of implementing
3434 /* ??? This stuff should be done at the same time that we shorten branches.
3435 As it is now, we must assume that all branches are the maximum size, and
3436 this causes us to almost always output constant pools sooner than
3439 /* Add a constant to the pool and return its label. */
3442 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3446 label_ref_list_t ref, newref;
3448 /* First see if we've already got it. */
3449 for (i = 0; i < pool_size; i++)
3451 if (x->code == pool_vector[i].value->code
3452 && mode == pool_vector[i].mode)
3454 if (x->code == CODE_LABEL)
3456 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3459 if (rtx_equal_p (x, pool_vector[i].value))
3464 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3466 new_rtx = gen_label_rtx ();
3467 LABEL_REFS (new_rtx) = pool_vector[i].label;
3468 pool_vector[i].label = lab = new_rtx;
3470 if (lab && pool_window_label)
3472 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3473 newref->label = pool_window_label;
3474 ref = pool_vector[pool_window_last].wend;
3476 pool_vector[pool_window_last].wend = newref;
3479 pool_window_label = new_rtx;
3480 pool_window_last = i;
3486 /* Need a new one. */
3487 pool_vector[pool_size].value = x;
3488 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3491 pool_vector[pool_size - 1].part_of_sequence_p = true;
3494 lab = gen_label_rtx ();
3495 pool_vector[pool_size].mode = mode;
3496 pool_vector[pool_size].label = lab;
3497 pool_vector[pool_size].wend = NULL;
3498 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3499 if (lab && pool_window_label)
3501 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3502 newref->label = pool_window_label;
3503 ref = pool_vector[pool_window_last].wend;
3505 pool_vector[pool_window_last].wend = newref;
3508 pool_window_label = lab;
3509 pool_window_last = pool_size;
3514 /* Output the literal table. START, if nonzero, is the first instruction
3515 this table is needed for, and also indicates that there is at least one
3516 casesi_worker_2 instruction; We have to emit the operand3 labels from
3517 these insns at a 4-byte aligned position. BARRIER is the barrier
3518 after which we are to place the table. */
3521 dump_table (rtx start, rtx barrier)
3527 label_ref_list_t ref;
3530 /* Do two passes, first time dump out the HI sized constants. */
3532 for (i = 0; i < pool_size; i++)
3534 pool_node *p = &pool_vector[i];
3536 if (p->mode == HImode)
3540 scan = emit_insn_after (gen_align_2 (), scan);
3543 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3544 scan = emit_label_after (lab, scan);
3545 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3547 for (ref = p->wend; ref; ref = ref->next)
3550 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3553 else if (p->mode == DFmode)
3561 scan = emit_insn_after (gen_align_4 (), scan);
3563 for (; start != barrier; start = NEXT_INSN (start))
3564 if (GET_CODE (start) == INSN
3565 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3567 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3568 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3570 scan = emit_label_after (lab, scan);
3573 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3575 rtx align_insn = NULL_RTX;
3577 scan = emit_label_after (gen_label_rtx (), scan);
3578 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3581 for (i = 0; i < pool_size; i++)
3583 pool_node *p = &pool_vector[i];
3591 if (align_insn && !p->part_of_sequence_p)
3593 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3594 emit_label_before (lab, align_insn);
3595 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3597 for (ref = p->wend; ref; ref = ref->next)
3600 emit_insn_before (gen_consttable_window_end (lab),
3603 delete_insn (align_insn);
3604 align_insn = NULL_RTX;
3609 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3610 scan = emit_label_after (lab, scan);
3611 scan = emit_insn_after (gen_consttable_4 (p->value,
3613 need_align = ! need_align;
3619 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3624 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3625 scan = emit_label_after (lab, scan);
3626 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3633 if (p->mode != HImode)
3635 for (ref = p->wend; ref; ref = ref->next)
3638 scan = emit_insn_after (gen_consttable_window_end (lab),
3647 for (i = 0; i < pool_size; i++)
3649 pool_node *p = &pool_vector[i];
3660 scan = emit_label_after (gen_label_rtx (), scan);
3661 scan = emit_insn_after (gen_align_4 (), scan);
3663 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3664 scan = emit_label_after (lab, scan);
3665 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3673 scan = emit_label_after (gen_label_rtx (), scan);
3674 scan = emit_insn_after (gen_align_4 (), scan);
3676 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3677 scan = emit_label_after (lab, scan);
3678 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3685 if (p->mode != HImode)
3687 for (ref = p->wend; ref; ref = ref->next)
3690 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3695 scan = emit_insn_after (gen_consttable_end (), scan);
3696 scan = emit_barrier_after (scan);
3698 pool_window_label = NULL_RTX;
3699 pool_window_last = 0;
3702 /* Return nonzero if constant would be an ok source for a
3703 mov.w instead of a mov.l. */
3708 return (GET_CODE (src) == CONST_INT
3709 && INTVAL (src) >= -32768
3710 && INTVAL (src) <= 32767);
3713 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3715 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3717 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3718 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3719 need to fix it if the input value is CONST_OK_FOR_I08. */
3722 broken_move (rtx insn)
3724 if (GET_CODE (insn) == INSN)
3726 rtx pat = PATTERN (insn);
3727 if (GET_CODE (pat) == PARALLEL)
3728 pat = XVECEXP (pat, 0, 0);
3729 if (GET_CODE (pat) == SET
3730 /* We can load any 8-bit value if we don't care what the high
3731 order bits end up as. */
3732 && GET_MODE (SET_DEST (pat)) != QImode
3733 && (CONSTANT_P (SET_SRC (pat))
3734 /* Match mova_const. */
3735 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3736 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3737 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3739 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3740 && (fp_zero_operand (SET_SRC (pat))
3741 || fp_one_operand (SET_SRC (pat)))
3742 /* ??? If this is a -m4 or -m4-single compilation, in general
3743 we don't know the current setting of fpscr, so disable fldi.
3744 There is an exception if this was a register-register move
3745 before reload - and hence it was ascertained that we have
3746 single precision setting - and in a post-reload optimization
3747 we changed this to do a constant load. In that case
3748 we don't have an r0 clobber, hence we must use fldi. */
3749 && (! TARGET_SH4 || TARGET_FMOVD
3750 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3752 && GET_CODE (SET_DEST (pat)) == REG
3753 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3755 && GET_MODE (SET_DEST (pat)) == SImode
3756 && (satisfies_constraint_I20 (SET_SRC (pat))
3757 || satisfies_constraint_I28 (SET_SRC (pat))))
3758 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3768 return (GET_CODE (insn) == INSN
3769 && GET_CODE (PATTERN (insn)) == SET
3770 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3771 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3772 /* Don't match mova_const. */
3773 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3776 /* Fix up a mova from a switch that went out of range. */
3778 fixup_mova (rtx mova)
3780 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3783 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3784 INSN_CODE (mova) = -1;
3789 rtx lab = gen_label_rtx ();
3790 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
3794 worker = NEXT_INSN (worker);
3796 && GET_CODE (worker) != CODE_LABEL
3797 && GET_CODE (worker) != JUMP_INSN);
3798 } while (GET_CODE (worker) == NOTE
3799 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3800 wpat = PATTERN (worker);
3801 wpat0 = XVECEXP (wpat, 0, 0);
3802 wpat1 = XVECEXP (wpat, 0, 1);
3803 wsrc = SET_SRC (wpat0);
3804 PATTERN (worker) = (gen_casesi_worker_2
3805 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3806 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3808 INSN_CODE (worker) = -1;
3809 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3810 base = gen_rtx_LABEL_REF (Pmode, lab);
3811 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
3812 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3813 INSN_CODE (mova) = -1;
3817 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3818 *num_mova, and check if the new mova is not nested within the first one.
3819 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3820 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3822 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3824 int n_addr = 0; /* Initialization to shut up spurious warning. */
3825 int f_target, n_target = 0; /* Likewise. */
3829 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3830 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3831 if (n_addr > n_target || n_addr + 1022 < n_target)
3833 /* Change the mova into a load.
3834 broken_move will then return true for it. */
3835 fixup_mova (new_mova);
3841 *first_mova = new_mova;
3846 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3851 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3852 > n_target - n_addr)
3854 fixup_mova (*first_mova);
3859 fixup_mova (new_mova);
3864 /* Find the last barrier from insn FROM which is close enough to hold the
3865 constant pool. If we can't find one, then create one near the end of
3869 find_barrier (int num_mova, rtx mova, rtx from)
3878 int leading_mova = num_mova;
3879 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3884 /* For HImode: range is 510, add 4 because pc counts from address of
3885 second instruction after this one, subtract 2 for the jump instruction
3886 that we may need to emit before the table, subtract 2 for the instruction
3887 that fills the jump delay slot (in very rare cases, reorg will take an
3888 instruction from after the constant pool or will leave the delay slot
3889 empty). This gives 510.
3890 For SImode: range is 1020, add 4 because pc counts from address of
3891 second instruction after this one, subtract 2 in case pc is 2 byte
3892 aligned, subtract 2 for the jump instruction that we may need to emit
3893 before the table, subtract 2 for the instruction that fills the jump
3894 delay slot. This gives 1018. */
3896 /* The branch will always be shortened now that the reference address for
3897 forward branches is the successor address, thus we need no longer make
3898 adjustments to the [sh]i_limit for -O0. */
3903 while (from && count_si < si_limit && count_hi < hi_limit)
3905 int inc = get_attr_length (from);
3908 /* If this is a label that existed at the time of the compute_alignments
3909 call, determine the alignment. N.B. When find_barrier recurses for
3910 an out-of-reach mova, we might see labels at the start of previously
3911 inserted constant tables. */
3912 if (GET_CODE (from) == CODE_LABEL
3913 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3916 new_align = 1 << label_to_alignment (from);
3917 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3918 new_align = 1 << barrier_align (from);
3923 /* In case we are scanning a constant table because of recursion, check
3924 for explicit alignments. If the table is long, we might be forced
3925 to emit the new table in front of it; the length of the alignment
3926 might be the last straw. */
3927 else if (GET_CODE (from) == INSN
3928 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3929 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3930 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3931 /* When we find the end of a constant table, paste the new constant
3932 at the end. That is better than putting it in front because
3933 this way, we don't need extra alignment for adding a 4-byte-aligned
3934 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3935 else if (GET_CODE (from) == INSN
3936 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3937 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3940 if (GET_CODE (from) == BARRIER)
3944 found_barrier = from;
3946 /* If we are at the end of the function, or in front of an alignment
3947 instruction, we need not insert an extra alignment. We prefer
3948 this kind of barrier. */
3949 if (barrier_align (from) > 2)
3950 good_barrier = from;
3952 /* If we are at the end of a hot/cold block, dump the constants
3954 next = NEXT_INSN (from);
3957 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3961 if (broken_move (from))
3964 enum machine_mode mode;
3966 pat = PATTERN (from);
3967 if (GET_CODE (pat) == PARALLEL)
3968 pat = XVECEXP (pat, 0, 0);
3969 src = SET_SRC (pat);
3970 dst = SET_DEST (pat);
3971 mode = GET_MODE (dst);
3973 /* We must explicitly check the mode, because sometimes the
3974 front end will generate code to load unsigned constants into
3975 HImode targets without properly sign extending them. */
3977 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3980 /* We put the short constants before the long constants, so
3981 we must count the length of short constants in the range
3982 for the long constants. */
3983 /* ??? This isn't optimal, but is easy to do. */
3988 /* We dump DF/DI constants before SF/SI ones, because
3989 the limit is the same, but the alignment requirements
3990 are higher. We may waste up to 4 additional bytes
3991 for alignment, and the DF/DI constant may have
3992 another SF/SI constant placed before it. */
3993 if (TARGET_SHCOMPACT
3995 && (mode == DFmode || mode == DImode))
4000 while (si_align > 2 && found_si + si_align - 2 > count_si)
4002 if (found_si > count_si)
4003 count_si = found_si;
4004 found_si += GET_MODE_SIZE (mode);
4006 si_limit -= GET_MODE_SIZE (mode);
4012 switch (untangle_mova (&num_mova, &mova, from))
4014 case 0: return find_barrier (0, 0, mova);
4019 = good_barrier ? good_barrier : found_barrier;
4023 if (found_si > count_si)
4024 count_si = found_si;
4026 else if (GET_CODE (from) == JUMP_INSN
4027 && (GET_CODE (PATTERN (from)) == ADDR_VEC
4028 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
4030 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4032 && (prev_nonnote_insn (from)
4033 == XEXP (MOVA_LABELREF (mova), 0))))
4035 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4037 /* We have just passed the barrier in front of the
4038 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4039 the ADDR_DIFF_VEC is accessed as data, just like our pool
4040 constants, this is a good opportunity to accommodate what
4041 we have gathered so far.
4042 If we waited any longer, we could end up at a barrier in
4043 front of code, which gives worse cache usage for separated
4044 instruction / data caches. */
4045 good_barrier = found_barrier;
4050 rtx body = PATTERN (from);
4051 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4054 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4055 else if (GET_CODE (from) == JUMP_INSN
4057 && ! TARGET_SMALLCODE)
4063 if (new_align > si_align)
4065 si_limit -= (count_si - 1) & (new_align - si_align);
4066 si_align = new_align;
4068 count_si = (count_si + new_align - 1) & -new_align;
4073 if (new_align > hi_align)
4075 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4076 hi_align = new_align;
4078 count_hi = (count_hi + new_align - 1) & -new_align;
4080 from = NEXT_INSN (from);
4087 /* Try as we might, the leading mova is out of range. Change
4088 it into a load (which will become a pcload) and retry. */
4090 return find_barrier (0, 0, mova);
4094 /* Insert the constant pool table before the mova instruction,
4095 to prevent the mova label reference from going out of range. */
4097 good_barrier = found_barrier = barrier_before_mova;
4103 if (good_barrier && next_real_insn (found_barrier))
4104 found_barrier = good_barrier;
4108 /* We didn't find a barrier in time to dump our stuff,
4109 so we'll make one. */
4110 rtx label = gen_label_rtx ();
4112 /* If we exceeded the range, then we must back up over the last
4113 instruction we looked at. Otherwise, we just need to undo the
4114 NEXT_INSN at the end of the loop. */
4115 if (PREV_INSN (from) != orig
4116 && (count_hi > hi_limit || count_si > si_limit))
4117 from = PREV_INSN (PREV_INSN (from));
4119 from = PREV_INSN (from);
4121 /* Walk back to be just before any jump or label.
4122 Putting it before a label reduces the number of times the branch
4123 around the constant pool table will be hit. Putting it before
4124 a jump makes it more likely that the bra delay slot will be
4126 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4127 || GET_CODE (from) == CODE_LABEL)
4128 from = PREV_INSN (from);
4130 from = emit_jump_insn_after (gen_jump (label), from);
4131 JUMP_LABEL (from) = label;
4132 LABEL_NUSES (label) = 1;
4133 found_barrier = emit_barrier_after (from);
4134 emit_label_after (label, found_barrier);
4137 return found_barrier;
4140 /* If the instruction INSN is implemented by a special function, and we can
4141 positively find the register that is used to call the sfunc, and this
4142 register is not used anywhere else in this instruction - except as the
4143 destination of a set, return this register; else, return 0. */
4145 sfunc_uses_reg (rtx insn)
4148 rtx pattern, part, reg_part, reg;
4150 if (GET_CODE (insn) != INSN)
4152 pattern = PATTERN (insn);
4153 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4156 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4158 part = XVECEXP (pattern, 0, i);
4159 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4164 reg = XEXP (reg_part, 0);
4165 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4167 part = XVECEXP (pattern, 0, i);
4168 if (part == reg_part || GET_CODE (part) == CLOBBER)
4170 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4171 && GET_CODE (SET_DEST (part)) == REG)
4172 ? SET_SRC (part) : part)))
4178 /* See if the only way in which INSN uses REG is by calling it, or by
4179 setting it while calling it. Set *SET to a SET rtx if the register
4183 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4189 reg2 = sfunc_uses_reg (insn);
4190 if (reg2 && REGNO (reg2) == REGNO (reg))
4192 pattern = single_set (insn);
4194 && GET_CODE (SET_DEST (pattern)) == REG
4195 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4199 if (GET_CODE (insn) != CALL_INSN)
4201 /* We don't use rtx_equal_p because we don't care if the mode is
4203 pattern = single_set (insn);
4205 && GET_CODE (SET_DEST (pattern)) == REG
4206 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4212 par = PATTERN (insn);
4213 if (GET_CODE (par) == PARALLEL)
4214 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4216 part = XVECEXP (par, 0, i);
4217 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4220 return reg_mentioned_p (reg, SET_SRC (pattern));
4226 pattern = PATTERN (insn);
4228 if (GET_CODE (pattern) == PARALLEL)
4232 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4233 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4235 pattern = XVECEXP (pattern, 0, 0);
4238 if (GET_CODE (pattern) == SET)
4240 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4242 /* We don't use rtx_equal_p, because we don't care if the
4243 mode is different. */
4244 if (GET_CODE (SET_DEST (pattern)) != REG
4245 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4251 pattern = SET_SRC (pattern);
4254 if (GET_CODE (pattern) != CALL
4255 || GET_CODE (XEXP (pattern, 0)) != MEM
4256 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4262 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4263 general registers. Bits 0..15 mean that the respective registers
4264 are used as inputs in the instruction. Bits 16..31 mean that the
4265 registers 0..15, respectively, are used as outputs, or are clobbered.
4266 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4268 regs_used (rtx x, int is_dest)
4276 code = GET_CODE (x);
4281 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4282 << (REGNO (x) + is_dest));
4286 rtx y = SUBREG_REG (x);
4288 if (GET_CODE (y) != REG)
4291 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4293 subreg_regno_offset (REGNO (y),
4296 GET_MODE (x)) + is_dest));
4300 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4302 /* If there was a return value, it must have been indicated with USE. */
4317 fmt = GET_RTX_FORMAT (code);
4319 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4324 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4325 used |= regs_used (XVECEXP (x, i, j), is_dest);
4327 else if (fmt[i] == 'e')
4328 used |= regs_used (XEXP (x, i), is_dest);
4333 /* Create an instruction that prevents redirection of a conditional branch
4334 to the destination of the JUMP with address ADDR.
4335 If the branch needs to be implemented as an indirect jump, try to find
4336 a scratch register for it.
4337 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4338 If any preceding insn that doesn't fit into a delay slot is good enough,
4339 pass 1. Pass 2 if a definite blocking insn is needed.
4340 -1 is used internally to avoid deep recursion.
4341 If a blocking instruction is made or recognized, return it. */
4344 gen_block_redirect (rtx jump, int addr, int need_block)
4347 rtx prev = prev_nonnote_insn (jump);
4350 /* First, check if we already have an instruction that satisfies our need. */
4351 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4353 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4355 if (GET_CODE (PATTERN (prev)) == USE
4356 || GET_CODE (PATTERN (prev)) == CLOBBER
4357 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4359 else if ((need_block &= ~1) < 0)
4361 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4364 if (GET_CODE (PATTERN (jump)) == RETURN)
4368 /* Reorg even does nasty things with return insns that cause branches
4369 to go out of range - see find_end_label and callers. */
4370 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4372 /* We can't use JUMP_LABEL here because it might be undefined
4373 when not optimizing. */
4374 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4375 /* If the branch is out of range, try to find a scratch register for it. */
4377 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4381 /* Don't look for the stack pointer as a scratch register,
4382 it would cause trouble if an interrupt occurred. */
4383 unsigned attempt = 0x7fff, used;
4384 int jump_left = flag_expensive_optimizations + 1;
4386 /* It is likely that the most recent eligible instruction is wanted for
4387 the delay slot. Therefore, find out which registers it uses, and
4388 try to avoid using them. */
4390 for (scan = jump; (scan = PREV_INSN (scan)); )
4394 if (INSN_DELETED_P (scan))
4396 code = GET_CODE (scan);
4397 if (code == CODE_LABEL || code == JUMP_INSN)
4400 && GET_CODE (PATTERN (scan)) != USE
4401 && GET_CODE (PATTERN (scan)) != CLOBBER
4402 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4404 attempt &= ~regs_used (PATTERN (scan), 0);
4408 for (used = dead = 0, scan = JUMP_LABEL (jump);
4409 (scan = NEXT_INSN (scan)); )
4413 if (INSN_DELETED_P (scan))
4415 code = GET_CODE (scan);
4418 used |= regs_used (PATTERN (scan), 0);
4419 if (code == CALL_INSN)
4420 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4421 dead |= (used >> 16) & ~used;
4427 if (code == JUMP_INSN)
4429 if (jump_left-- && simplejump_p (scan))
4430 scan = JUMP_LABEL (scan);
4436 /* Mask out the stack pointer again, in case it was
4437 the only 'free' register we have found. */
4440 /* If the immediate destination is still in range, check for possible
4441 threading with a jump beyond the delay slot insn.
4442 Don't check if we are called recursively; the jump has been or will be
4443 checked in a different invocation then. */
4445 else if (optimize && need_block >= 0)
4447 rtx next = next_active_insn (next_active_insn (dest));
4448 if (next && GET_CODE (next) == JUMP_INSN
4449 && GET_CODE (PATTERN (next)) == SET
4450 && recog_memoized (next) == CODE_FOR_jump_compact)
4452 dest = JUMP_LABEL (next);
4454 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4456 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4462 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4464 /* It would be nice if we could convert the jump into an indirect
4465 jump / far branch right now, and thus exposing all constituent
4466 instructions to further optimization. However, reorg uses
4467 simplejump_p to determine if there is an unconditional jump where
4468 it should try to schedule instructions from the target of the
4469 branch; simplejump_p fails for indirect jumps even if they have
4471 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4472 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4474 /* ??? We would like this to have the scope of the jump, but that
4475 scope will change when a delay slot insn of an inner scope is added.
4476 Hence, after delay slot scheduling, we'll have to expect
4477 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4480 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4481 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4484 else if (need_block)
4485 /* We can't use JUMP_LABEL here because it might be undefined
4486 when not optimizing. */
4487 return emit_insn_before (gen_block_branch_redirect
4488 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4493 #define CONDJUMP_MIN -252
4494 #define CONDJUMP_MAX 262
4497 /* A label (to be placed) in front of the jump
4498 that jumps to our ultimate destination. */
4500 /* Where we are going to insert it if we cannot move the jump any farther,
4501 or the jump itself if we have picked up an existing jump. */
4503 /* The ultimate destination. */
4505 struct far_branch *prev;
4506 /* If the branch has already been created, its address;
4507 else the address of its first prospective user. */
4511 static void gen_far_branch (struct far_branch *);
4512 enum mdep_reorg_phase_e mdep_reorg_phase;
4514 gen_far_branch (struct far_branch *bp)
4516 rtx insn = bp->insert_place;
4518 rtx label = gen_label_rtx ();
4521 emit_label_after (label, insn);
4524 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4525 LABEL_NUSES (bp->far_label)++;
4528 jump = emit_jump_insn_after (gen_return (), insn);
4529 /* Emit a barrier so that reorg knows that any following instructions
4530 are not reachable via a fall-through path.
4531 But don't do this when not optimizing, since we wouldn't suppress the
4532 alignment for the barrier then, and could end up with out-of-range
4533 pc-relative loads. */
4535 emit_barrier_after (jump);
4536 emit_label_after (bp->near_label, insn);
4537 JUMP_LABEL (jump) = bp->far_label;
4538 ok = invert_jump (insn, label, 1);
4541 /* If we are branching around a jump (rather than a return), prevent
4542 reorg from using an insn from the jump target as the delay slot insn -
4543 when reorg did this, it pessimized code (we rather hide the delay slot)
4544 and it could cause branches to go out of range. */
4547 (gen_stuff_delay_slot
4548 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4549 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4551 /* Prevent reorg from undoing our splits. */
4552 gen_block_redirect (jump, bp->address += 2, 2);
4555 /* Fix up ADDR_DIFF_VECs. */
4557 fixup_addr_diff_vecs (rtx first)
4561 for (insn = first; insn; insn = NEXT_INSN (insn))
4563 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4565 if (GET_CODE (insn) != JUMP_INSN
4566 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4568 pat = PATTERN (insn);
4569 vec_lab = XEXP (XEXP (pat, 0), 0);
4571 /* Search the matching casesi_jump_2. */
4572 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4574 if (GET_CODE (prev) != JUMP_INSN)
4576 prevpat = PATTERN (prev);
4577 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4579 x = XVECEXP (prevpat, 0, 1);
4580 if (GET_CODE (x) != USE)
4583 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4586 /* FIXME: This is a bug in the optimizer, but it seems harmless
4587 to just avoid panicing. */
4591 /* Emit the reference label of the braf where it belongs, right after
4592 the casesi_jump_2 (i.e. braf). */
4593 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4594 emit_label_after (braf_label, prev);
4596 /* Fix up the ADDR_DIF_VEC to be relative
4597 to the reference address of the braf. */
4598 XEXP (XEXP (pat, 0), 0) = braf_label;
4602 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4603 a barrier. Return the base 2 logarithm of the desired alignment. */
4605 barrier_align (rtx barrier_or_label)
4607 rtx next = next_real_insn (barrier_or_label), pat, prev;
4608 int slot, credit, jump_to_next = 0;
4613 pat = PATTERN (next);
4615 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4618 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4619 /* This is a barrier in front of a constant table. */
4622 prev = prev_real_insn (barrier_or_label);
4623 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4625 pat = PATTERN (prev);
4626 /* If this is a very small table, we want to keep the alignment after
4627 the table to the minimum for proper code alignment. */
4628 return ((TARGET_SMALLCODE
4629 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4630 <= (unsigned) 1 << (CACHE_LOG - 2)))
4631 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4634 if (TARGET_SMALLCODE)
4637 if (! TARGET_SH2 || ! optimize)
4638 return align_jumps_log;
4640 /* When fixing up pcloads, a constant table might be inserted just before
4641 the basic block that ends with the barrier. Thus, we can't trust the
4642 instruction lengths before that. */
4643 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4645 /* Check if there is an immediately preceding branch to the insn beyond
4646 the barrier. We must weight the cost of discarding useful information
4647 from the current cache line when executing this branch and there is
4648 an alignment, against that of fetching unneeded insn in front of the
4649 branch target when there is no alignment. */
4651 /* There are two delay_slot cases to consider. One is the simple case
4652 where the preceding branch is to the insn beyond the barrier (simple
4653 delay slot filling), and the other is where the preceding branch has
4654 a delay slot that is a duplicate of the insn after the barrier
4655 (fill_eager_delay_slots) and the branch is to the insn after the insn
4656 after the barrier. */
4658 /* PREV is presumed to be the JUMP_INSN for the barrier under
4659 investigation. Skip to the insn before it. */
4660 prev = prev_real_insn (prev);
4662 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4663 credit >= 0 && prev && GET_CODE (prev) == INSN;
4664 prev = prev_real_insn (prev))
4667 if (GET_CODE (PATTERN (prev)) == USE
4668 || GET_CODE (PATTERN (prev)) == CLOBBER)
4670 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4672 prev = XVECEXP (PATTERN (prev), 0, 1);
4673 if (INSN_UID (prev) == INSN_UID (next))
4675 /* Delay slot was filled with insn at jump target. */
4682 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4684 credit -= get_attr_length (prev);
4687 && GET_CODE (prev) == JUMP_INSN
4688 && JUMP_LABEL (prev))
4692 || next_real_insn (JUMP_LABEL (prev)) == next
4693 /* If relax_delay_slots() decides NEXT was redundant
4694 with some previous instruction, it will have
4695 redirected PREV's jump to the following insn. */
4696 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4697 /* There is no upper bound on redundant instructions
4698 that might have been skipped, but we must not put an
4699 alignment where none had been before. */
4700 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4702 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4703 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4704 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4706 rtx pat = PATTERN (prev);
4707 if (GET_CODE (pat) == PARALLEL)
4708 pat = XVECEXP (pat, 0, 0);
4709 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4715 return align_jumps_log;
4718 /* If we are inside a phony loop, almost any kind of label can turn up as the
4719 first one in the loop. Aligning a braf label causes incorrect switch
4720 destination addresses; we can detect braf labels because they are
4721 followed by a BARRIER.
4722 Applying loop alignment to small constant or switch tables is a waste
4723 of space, so we suppress this too. */
4725 sh_loop_align (rtx label)
4730 next = next_nonnote_insn (next);
4731 while (next && GET_CODE (next) == CODE_LABEL);
4735 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4736 || recog_memoized (next) == CODE_FOR_consttable_2)
4739 return align_loops_log;
4742 /* Do a final pass over the function, just before delayed branch
4748 rtx first, insn, mova = NULL_RTX;
4750 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4751 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4753 first = get_insns ();
4754 max_labelno_before_reorg = max_label_num ();
4756 /* We must split call insns before introducing `mova's. If we're
4757 optimizing, they'll have already been split. Otherwise, make
4758 sure we don't split them too late. */
4760 split_all_insns_noflow ();
4765 /* If relaxing, generate pseudo-ops to associate function calls with
4766 the symbols they call. It does no harm to not generate these
4767 pseudo-ops. However, when we can generate them, it enables to
4768 linker to potentially relax the jsr to a bsr, and eliminate the
4769 register load and, possibly, the constant pool entry. */
4771 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4774 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4775 own purposes. This works because none of the remaining passes
4776 need to look at them.
4778 ??? But it may break in the future. We should use a machine
4779 dependent REG_NOTE, or some other approach entirely. */
4780 for (insn = first; insn; insn = NEXT_INSN (insn))
4786 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4788 remove_note (insn, note);
4792 for (insn = first; insn; insn = NEXT_INSN (insn))
4794 rtx pattern, reg, link, set, scan, dies, label;
4795 int rescan = 0, foundinsn = 0;
4797 if (GET_CODE (insn) == CALL_INSN)
4799 pattern = PATTERN (insn);
4801 if (GET_CODE (pattern) == PARALLEL)
4802 pattern = XVECEXP (pattern, 0, 0);
4803 if (GET_CODE (pattern) == SET)
4804 pattern = SET_SRC (pattern);
4806 if (GET_CODE (pattern) != CALL
4807 || GET_CODE (XEXP (pattern, 0)) != MEM)
4810 reg = XEXP (XEXP (pattern, 0), 0);
4814 reg = sfunc_uses_reg (insn);
4819 if (GET_CODE (reg) != REG)
4822 /* Try scanning backward to find where the register is set. */
4824 for (scan = PREV_INSN (insn);
4825 scan && GET_CODE (scan) != CODE_LABEL;
4826 scan = PREV_INSN (scan))
4828 if (! INSN_P (scan))
4831 if (! reg_mentioned_p (reg, scan))
4834 if (noncall_uses_reg (reg, scan, &set))
4847 /* The register is set at LINK. */
4849 /* We can only optimize the function call if the register is
4850 being set to a symbol. In theory, we could sometimes
4851 optimize calls to a constant location, but the assembler
4852 and linker do not support that at present. */
4853 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4854 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4857 /* Scan forward from LINK to the place where REG dies, and
4858 make sure that the only insns which use REG are
4859 themselves function calls. */
4861 /* ??? This doesn't work for call targets that were allocated
4862 by reload, since there may not be a REG_DEAD note for the
4866 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4870 /* Don't try to trace forward past a CODE_LABEL if we haven't
4871 seen INSN yet. Ordinarily, we will only find the setting insn
4872 if it is in the same basic block. However,
4873 cross-jumping can insert code labels in between the load and
4874 the call, and can result in situations where a single call
4875 insn may have two targets depending on where we came from. */
4877 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4880 if (! INSN_P (scan))
4883 /* Don't try to trace forward past a JUMP. To optimize
4884 safely, we would have to check that all the
4885 instructions at the jump destination did not use REG. */
4887 if (GET_CODE (scan) == JUMP_INSN)
4890 if (! reg_mentioned_p (reg, scan))
4893 if (noncall_uses_reg (reg, scan, &scanset))
4900 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4902 /* There is a function call to this register other
4903 than the one we are checking. If we optimize
4904 this call, we need to rescan again below. */
4908 /* ??? We shouldn't have to worry about SCANSET here.
4909 We should just be able to check for a REG_DEAD note
4910 on a function call. However, the REG_DEAD notes are
4911 apparently not dependable around libcalls; c-torture
4912 execute/920501-2 is a test case. If SCANSET is set,
4913 then this insn sets the register, so it must have
4914 died earlier. Unfortunately, this will only handle
4915 the cases in which the register is, in fact, set in a
4918 /* ??? We shouldn't have to use FOUNDINSN here.
4919 This dates back to when we used LOG_LINKS to find
4920 the most recent insn which sets the register. */
4924 || find_reg_note (scan, REG_DEAD, reg)))
4933 /* Either there was a branch, or some insn used REG
4934 other than as a function call address. */
4938 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4939 on the insn which sets the register, and on each call insn
4940 which uses the register. In final_prescan_insn we look for
4941 the REG_LABEL_OPERAND notes, and output the appropriate label
4944 label = gen_label_rtx ();
4945 add_reg_note (link, REG_LABEL_OPERAND, label);
4946 add_reg_note (insn, REG_LABEL_OPERAND, label);
4954 scan = NEXT_INSN (scan);
4956 && ((GET_CODE (scan) == CALL_INSN
4957 && reg_mentioned_p (reg, scan))
4958 || ((reg2 = sfunc_uses_reg (scan))
4959 && REGNO (reg2) == REGNO (reg))))
4960 add_reg_note (scan, REG_LABEL_OPERAND, label);
4962 while (scan != dies);
4968 fixup_addr_diff_vecs (first);
4972 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4973 shorten_branches (first);
4976 /* Scan the function looking for move instructions which have to be
4977 changed to pc-relative loads and insert the literal tables. */
4978 label_ref_list_pool = create_alloc_pool ("label references list",
4979 sizeof (struct label_ref_list_d),
4981 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4982 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4986 /* ??? basic block reordering can move a switch table dispatch
4987 below the switch table. Check if that has happened.
4988 We only have the addresses available when optimizing; but then,
4989 this check shouldn't be needed when not optimizing. */
4990 if (!untangle_mova (&num_mova, &mova, insn))
4996 else if (GET_CODE (insn) == JUMP_INSN
4997 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4999 /* ??? loop invariant motion can also move a mova out of a
5000 loop. Since loop does this code motion anyway, maybe we
5001 should wrap UNSPEC_MOVA into a CONST, so that reload can
5004 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5005 || (prev_nonnote_insn (insn)
5006 == XEXP (MOVA_LABELREF (mova), 0))))
5013 /* Some code might have been inserted between the mova and
5014 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5015 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5016 total += get_attr_length (scan);
5018 /* range of mova is 1020, add 4 because pc counts from address of
5019 second instruction after this one, subtract 2 in case pc is 2
5020 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5021 cancels out with alignment effects of the mova itself. */
5024 /* Change the mova into a load, and restart scanning
5025 there. broken_move will then return true for mova. */
5030 if (broken_move (insn)
5031 || (GET_CODE (insn) == INSN
5032 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5035 /* Scan ahead looking for a barrier to stick the constant table
5037 rtx barrier = find_barrier (num_mova, mova, insn);
5038 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5039 int need_aligned_label = 0;
5041 if (num_mova && ! mova_p (mova))
5043 /* find_barrier had to change the first mova into a
5044 pcload; thus, we have to start with this new pcload. */
5048 /* Now find all the moves between the points and modify them. */
5049 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5051 if (GET_CODE (scan) == CODE_LABEL)
5053 if (GET_CODE (scan) == INSN
5054 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5055 need_aligned_label = 1;
5056 if (broken_move (scan))
5058 rtx *patp = &PATTERN (scan), pat = *patp;
5062 enum machine_mode mode;
5064 if (GET_CODE (pat) == PARALLEL)
5065 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5066 src = SET_SRC (pat);
5067 dst = SET_DEST (pat);
5068 mode = GET_MODE (dst);
5070 if (mode == SImode && hi_const (src)
5071 && REGNO (dst) != FPUL_REG)
5076 while (GET_CODE (dst) == SUBREG)
5078 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5079 GET_MODE (SUBREG_REG (dst)),
5082 dst = SUBREG_REG (dst);
5084 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5086 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5088 /* This must be an insn that clobbers r0. */
5089 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5090 XVECLEN (PATTERN (scan), 0)
5092 rtx clobber = *clobberp;
5094 gcc_assert (GET_CODE (clobber) == CLOBBER
5095 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5098 && reg_set_between_p (r0_rtx, last_float_move, scan))
5102 && GET_MODE_SIZE (mode) != 4
5103 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5105 lab = add_constant (src, mode, last_float);
5107 emit_insn_before (gen_mova (lab), scan);
5110 /* There will be a REG_UNUSED note for r0 on
5111 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5112 lest reorg:mark_target_live_regs will not
5113 consider r0 to be used, and we end up with delay
5114 slot insn in front of SCAN that clobbers r0. */
5116 = find_regno_note (last_float_move, REG_UNUSED, 0);
5118 /* If we are not optimizing, then there may not be
5121 PUT_MODE (note, REG_INC);
5123 *last_float_addr = r0_inc_rtx;
5125 last_float_move = scan;
5127 newsrc = gen_const_mem (mode,
5128 (((TARGET_SH4 && ! TARGET_FMOVD)
5129 || REGNO (dst) == FPUL_REG)
5132 last_float_addr = &XEXP (newsrc, 0);
5134 /* Remove the clobber of r0. */
5135 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5136 gen_rtx_SCRATCH (Pmode));
5138 /* This is a mova needing a label. Create it. */
5139 else if (GET_CODE (src) == UNSPEC
5140 && XINT (src, 1) == UNSPEC_MOVA
5141 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5143 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5144 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5145 newsrc = gen_rtx_UNSPEC (SImode,
5146 gen_rtvec (1, newsrc),
5151 lab = add_constant (src, mode, 0);
5152 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5153 newsrc = gen_const_mem (mode, newsrc);
5155 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5156 INSN_CODE (scan) = -1;
5159 dump_table (need_aligned_label ? insn : 0, barrier);
5163 free_alloc_pool (label_ref_list_pool);
5164 for (insn = first; insn; insn = NEXT_INSN (insn))
5165 PUT_MODE (insn, VOIDmode);
5167 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5168 INSN_ADDRESSES_FREE ();
5169 split_branches (first);
5171 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5172 also has an effect on the register that holds the address of the sfunc.
5173 Insert an extra dummy insn in front of each sfunc that pretends to
5174 use this register. */
5175 if (flag_delayed_branch)
5177 for (insn = first; insn; insn = NEXT_INSN (insn))
5179 rtx reg = sfunc_uses_reg (insn);
5183 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5187 /* fpscr is not actually a user variable, but we pretend it is for the
5188 sake of the previous optimization passes, since we want it handled like
5189 one. However, we don't have any debugging information for it, so turn
5190 it into a non-user variable now. */
5192 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5194 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5198 get_dest_uid (rtx label, int max_uid)
5200 rtx dest = next_real_insn (label);
5203 /* This can happen for an undefined label. */
5205 dest_uid = INSN_UID (dest);
5206 /* If this is a newly created branch redirection blocking instruction,
5207 we cannot index the branch_uid or insn_addresses arrays with its
5208 uid. But then, we won't need to, because the actual destination is
5209 the following branch. */
5210 while (dest_uid >= max_uid)
5212 dest = NEXT_INSN (dest);
5213 dest_uid = INSN_UID (dest);
5215 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5220 /* Split condbranches that are out of range. Also add clobbers for
5221 scratch registers that are needed in far jumps.
5222 We do this before delay slot scheduling, so that it can take our
5223 newly created instructions into account. It also allows us to
5224 find branches with common targets more easily. */
5227 split_branches (rtx first)
5230 struct far_branch **uid_branch, *far_branch_list = 0;
5231 int max_uid = get_max_uid ();
5234 /* Find out which branches are out of range. */
5235 shorten_branches (first);
5237 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5238 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5240 for (insn = first; insn; insn = NEXT_INSN (insn))
5241 if (! INSN_P (insn))
5243 else if (INSN_DELETED_P (insn))
5245 /* Shorten_branches would split this instruction again,
5246 so transform it into a note. */
5247 SET_INSN_DELETED (insn);
5249 else if (GET_CODE (insn) == JUMP_INSN
5250 /* Don't mess with ADDR_DIFF_VEC */
5251 && (GET_CODE (PATTERN (insn)) == SET
5252 || GET_CODE (PATTERN (insn)) == RETURN))
5254 enum attr_type type = get_attr_type (insn);
5255 if (type == TYPE_CBRANCH)
5259 if (get_attr_length (insn) > 4)
5261 rtx src = SET_SRC (PATTERN (insn));
5262 rtx olabel = XEXP (XEXP (src, 1), 0);
5263 int addr = INSN_ADDRESSES (INSN_UID (insn));
5265 int dest_uid = get_dest_uid (olabel, max_uid);
5266 struct far_branch *bp = uid_branch[dest_uid];
5268 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5269 the label if the LABEL_NUSES count drops to zero. There is
5270 always a jump_optimize pass that sets these values, but it
5271 proceeds to delete unreferenced code, and then if not
5272 optimizing, to un-delete the deleted instructions, thus
5273 leaving labels with too low uses counts. */
5276 JUMP_LABEL (insn) = olabel;
5277 LABEL_NUSES (olabel)++;
5281 bp = (struct far_branch *) alloca (sizeof *bp);
5282 uid_branch[dest_uid] = bp;
5283 bp->prev = far_branch_list;
5284 far_branch_list = bp;
5286 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5287 LABEL_NUSES (bp->far_label)++;
5291 label = bp->near_label;
5292 if (! label && bp->address - addr >= CONDJUMP_MIN)
5294 rtx block = bp->insert_place;
5296 if (GET_CODE (PATTERN (block)) == RETURN)
5297 block = PREV_INSN (block);
5299 block = gen_block_redirect (block,
5301 label = emit_label_after (gen_label_rtx (),
5303 bp->near_label = label;
5305 else if (label && ! NEXT_INSN (label))
5307 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5308 bp->insert_place = insn;
5310 gen_far_branch (bp);
5314 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5316 bp->near_label = label = gen_label_rtx ();
5317 bp->insert_place = insn;
5320 ok = redirect_jump (insn, label, 0);
5325 /* get_attr_length (insn) == 2 */
5326 /* Check if we have a pattern where reorg wants to redirect
5327 the branch to a label from an unconditional branch that
5329 /* We can't use JUMP_LABEL here because it might be undefined
5330 when not optimizing. */
5331 /* A syntax error might cause beyond to be NULL_RTX. */
5333 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5337 && (GET_CODE (beyond) == JUMP_INSN
5338 || ((beyond = next_active_insn (beyond))
5339 && GET_CODE (beyond) == JUMP_INSN))
5340 && GET_CODE (PATTERN (beyond)) == SET
5341 && recog_memoized (beyond) == CODE_FOR_jump_compact
5343 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5344 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5346 gen_block_redirect (beyond,
5347 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5350 next = next_active_insn (insn);
5352 if ((GET_CODE (next) == JUMP_INSN
5353 || ((next = next_active_insn (next))
5354 && GET_CODE (next) == JUMP_INSN))
5355 && GET_CODE (PATTERN (next)) == SET
5356 && recog_memoized (next) == CODE_FOR_jump_compact
5358 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5359 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5361 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5363 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5365 int addr = INSN_ADDRESSES (INSN_UID (insn));
5368 struct far_branch *bp;
5370 if (type == TYPE_JUMP)
5372 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5373 dest_uid = get_dest_uid (far_label, max_uid);
5376 /* Parse errors can lead to labels outside
5378 if (! NEXT_INSN (far_label))
5383 JUMP_LABEL (insn) = far_label;
5384 LABEL_NUSES (far_label)++;
5386 redirect_jump (insn, NULL_RTX, 1);
5390 bp = uid_branch[dest_uid];
5393 bp = (struct far_branch *) alloca (sizeof *bp);
5394 uid_branch[dest_uid] = bp;
5395 bp->prev = far_branch_list;
5396 far_branch_list = bp;
5398 bp->far_label = far_label;
5400 LABEL_NUSES (far_label)++;
5402 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5403 if (addr - bp->address <= CONDJUMP_MAX)
5404 emit_label_after (bp->near_label, PREV_INSN (insn));
5407 gen_far_branch (bp);
5413 bp->insert_place = insn;
5415 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5417 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5420 /* Generate all pending far branches,
5421 and free our references to the far labels. */
5422 while (far_branch_list)
5424 if (far_branch_list->near_label
5425 && ! NEXT_INSN (far_branch_list->near_label))
5426 gen_far_branch (far_branch_list);
5428 && far_branch_list->far_label
5429 && ! --LABEL_NUSES (far_branch_list->far_label))
5430 delete_insn (far_branch_list->far_label);
5431 far_branch_list = far_branch_list->prev;
5434 /* Instruction length information is no longer valid due to the new
5435 instructions that have been generated. */
5436 init_insn_lengths ();
5439 /* Dump out instruction addresses, which is useful for debugging the
5440 constant pool table stuff.
5442 If relaxing, output the label and pseudo-ops used to link together
5443 calls and the instruction which set the registers. */
5445 /* ??? The addresses printed by this routine for insns are nonsense for
5446 insns which are inside of a sequence where none of the inner insns have
5447 variable length. This is because the second pass of shorten_branches
5448 does not bother to update them. */
5451 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5452 int noperands ATTRIBUTE_UNUSED)
5454 if (TARGET_DUMPISIZE)
5455 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5461 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5466 pattern = PATTERN (insn);
5467 if (GET_CODE (pattern) == PARALLEL)
5468 pattern = XVECEXP (pattern, 0, 0);
5469 switch (GET_CODE (pattern))
5472 if (GET_CODE (SET_SRC (pattern)) != CALL
5473 && get_attr_type (insn) != TYPE_SFUNC)
5475 targetm.asm_out.internal_label
5476 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5479 /* else FALLTHROUGH */
5481 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5482 CODE_LABEL_NUMBER (XEXP (note, 0)));
5492 /* Dump out any constants accumulated in the final pass. These will
5496 output_jump_label_table (void)
5502 fprintf (asm_out_file, "\t.align 2\n");
5503 for (i = 0; i < pool_size; i++)
5505 pool_node *p = &pool_vector[i];
5507 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5508 CODE_LABEL_NUMBER (p->label));
5509 output_asm_insn (".long %O0", &p->value);
5517 /* A full frame looks like:
5521 [ if current_function_anonymous_args
5534 local-0 <- fp points here. */
5536 /* Number of bytes pushed for anonymous args, used to pass information
5537 between expand_prologue and expand_epilogue. */
5539 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5540 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5541 for an epilogue and a negative value means that it's for a sibcall
5542 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5543 all the registers that are about to be restored, and hence dead. */
5546 output_stack_adjust (int size, rtx reg, int epilogue_p,
5547 HARD_REG_SET *live_regs_mask)
5549 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5552 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5554 /* This test is bogus, as output_stack_adjust is used to re-align the
5557 gcc_assert (!(size % align));
5560 if (CONST_OK_FOR_ADD (size))
5561 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5562 /* Try to do it with two partial adjustments; however, we must make
5563 sure that the stack is properly aligned at all times, in case
5564 an interrupt occurs between the two partial adjustments. */
5565 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5566 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5568 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5569 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5575 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5578 /* If TEMP is invalid, we could temporarily save a general
5579 register to MACL. However, there is currently no need
5580 to handle this case, so just die when we see it. */
5582 || current_function_interrupt
5583 || ! call_really_used_regs[temp] || fixed_regs[temp])
5585 if (temp < 0 && ! current_function_interrupt
5586 && (TARGET_SHMEDIA || epilogue_p >= 0))
5589 COPY_HARD_REG_SET (temps, call_used_reg_set);
5590 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5594 if (crtl->return_rtx)
5596 enum machine_mode mode;
5597 mode = GET_MODE (crtl->return_rtx);
5598 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5599 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5601 for (i = 0; i < nreg; i++)
5602 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5603 if (crtl->calls_eh_return)
5605 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5606 for (i = 0; i <= 3; i++)
5607 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5610 if (TARGET_SHMEDIA && epilogue_p < 0)
5611 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5612 CLEAR_HARD_REG_BIT (temps, i);
5613 if (epilogue_p <= 0)
5615 for (i = FIRST_PARM_REG;
5616 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5617 CLEAR_HARD_REG_BIT (temps, i);
5618 if (cfun->static_chain_decl != NULL)
5619 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5621 temp = scavenge_reg (&temps);
5623 if (temp < 0 && live_regs_mask)
5627 COPY_HARD_REG_SET (temps, *live_regs_mask);
5628 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5629 temp = scavenge_reg (&temps);
5633 rtx adj_reg, tmp_reg, mem;
5635 /* If we reached here, the most likely case is the (sibcall)
5636 epilogue for non SHmedia. Put a special push/pop sequence
5637 for such case as the last resort. This looks lengthy but
5638 would not be problem because it seems to be very
5641 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5644 /* ??? There is still the slight possibility that r4 or
5645 r5 have been reserved as fixed registers or assigned
5646 as global registers, and they change during an
5647 interrupt. There are possible ways to handle this:
5649 - If we are adjusting the frame pointer (r14), we can do
5650 with a single temp register and an ordinary push / pop
5652 - Grab any call-used or call-saved registers (i.e. not
5653 fixed or globals) for the temps we need. We might
5654 also grab r14 if we are adjusting the stack pointer.
5655 If we can't find enough available registers, issue
5656 a diagnostic and die - the user must have reserved
5657 way too many registers.
5658 But since all this is rather unlikely to happen and
5659 would require extra testing, we just die if r4 / r5
5660 are not available. */
5661 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5662 && !global_regs[4] && !global_regs[5]);
5664 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5665 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5666 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5667 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5668 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5669 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5670 emit_move_insn (mem, tmp_reg);
5671 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5672 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5673 emit_move_insn (mem, tmp_reg);
5674 emit_move_insn (reg, adj_reg);
5675 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5676 emit_move_insn (adj_reg, mem);
5677 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5678 emit_move_insn (tmp_reg, mem);
5679 /* Tell flow the insns that pop r4/r5 aren't dead. */
5684 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5686 /* If SIZE is negative, subtract the positive value.
5687 This sometimes allows a constant pool entry to be shared
5688 between prologue and epilogue code. */
5691 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5692 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5696 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5697 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5701 = (gen_rtx_EXPR_LIST
5702 (REG_FRAME_RELATED_EXPR,
5703 gen_rtx_SET (VOIDmode, reg,
5704 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5714 RTX_FRAME_RELATED_P (x) = 1;
5718 /* Output RTL to push register RN onto the stack. */
5725 x = gen_push_fpul ();
5726 else if (rn == FPSCR_REG)
5727 x = gen_push_fpscr ();
5728 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5729 && FP_OR_XD_REGISTER_P (rn))
5731 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5733 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5735 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5736 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5738 x = gen_push (gen_rtx_REG (SImode, rn));
5742 = gen_rtx_EXPR_LIST (REG_INC,
5743 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5747 /* Output RTL to pop register RN from the stack. */
5754 x = gen_pop_fpul ();
5755 else if (rn == FPSCR_REG)
5756 x = gen_pop_fpscr ();
5757 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5758 && FP_OR_XD_REGISTER_P (rn))
5760 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5762 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5764 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5765 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5767 x = gen_pop (gen_rtx_REG (SImode, rn));
5771 = gen_rtx_EXPR_LIST (REG_INC,
5772 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5775 /* Generate code to push the regs specified in the mask. */
5778 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5780 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5783 /* Push PR last; this gives better latencies after the prologue, and
5784 candidates for the return delay slot when there are no general
5785 registers pushed. */
5786 for (; i < FIRST_PSEUDO_REGISTER; i++)
5788 /* If this is an interrupt handler, and the SZ bit varies,
5789 and we have to push any floating point register, we need
5790 to switch to the correct precision first. */
5791 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5792 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5794 HARD_REG_SET unsaved;
5797 COMPL_HARD_REG_SET (unsaved, *mask);
5798 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5802 && (i != FPSCR_REG || ! skip_fpscr)
5803 && TEST_HARD_REG_BIT (*mask, i))
5805 /* If the ISR has RESBANK attribute assigned, don't push any of
5806 the following registers - R0-R14, MACH, MACL and GBR. */
5807 if (! (sh_cfun_resbank_handler_p ()
5808 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
5816 /* Push banked registers last to improve delay slot opportunities. */
5817 if (interrupt_handler)
5818 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5819 if (TEST_HARD_REG_BIT (*mask, i))
5822 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
5823 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
5827 /* Calculate how much extra space is needed to save all callee-saved
5829 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5832 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5835 int stack_space = 0;
5836 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5838 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5839 if ((! call_really_used_regs[reg] || interrupt_handler)
5840 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5841 /* Leave space to save this target register on the stack,
5842 in case target register allocation wants to use it. */
5843 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5847 /* Decide whether we should reserve space for callee-save target registers,
5848 in case target register allocation wants to use them. REGS_SAVED is
5849 the space, in bytes, that is already required for register saves.
5850 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5853 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5854 HARD_REG_SET *live_regs_mask)
5858 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5861 /* Decide how much space to reserve for callee-save target registers
5862 in case target register allocation wants to use them.
5863 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5866 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5868 if (shmedia_space_reserved_for_target_registers)
5869 return shmedia_target_regs_stack_space (live_regs_mask);
5874 /* Work out the registers which need to be saved, both as a mask and a
5875 count of saved words. Return the count.
5877 If doing a pragma interrupt function, then push all regs used by the
5878 function, and if we call another function (we can tell by looking at PR),
5879 make sure that all the regs it clobbers are safe too. */
5882 calc_live_regs (HARD_REG_SET *live_regs_mask)
5887 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5888 bool nosave_low_regs;
5889 int pr_live, has_call;
5891 attrs = DECL_ATTRIBUTES (current_function_decl);
5892 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5893 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5894 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5895 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5897 CLEAR_HARD_REG_SET (*live_regs_mask);
5898 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5899 && df_regs_ever_live_p (FPSCR_REG))
5900 target_flags &= ~MASK_FPU_SINGLE;
5901 /* If we can save a lot of saves by switching to double mode, do that. */
5902 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5903 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5904 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5905 && (! call_really_used_regs[reg]
5906 || interrupt_handler)
5909 target_flags &= ~MASK_FPU_SINGLE;
5912 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5913 knows how to use it. That means the pseudo originally allocated for
5914 the initial value can become the PR_MEDIA_REG hard register, as seen for
5915 execute/20010122-1.c:test9. */
5917 /* ??? this function is called from initial_elimination_offset, hence we
5918 can't use the result of sh_media_register_for_return here. */
5919 pr_live = sh_pr_n_sets ();
5922 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5923 pr_live = (pr_initial
5924 ? (GET_CODE (pr_initial) != REG
5925 || REGNO (pr_initial) != (PR_REG))
5926 : df_regs_ever_live_p (PR_REG));
5927 /* For Shcompact, if not optimizing, we end up with a memory reference
5928 using the return address pointer for __builtin_return_address even
5929 though there is no actual need to put the PR register on the stack. */
5930 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5932 /* Force PR to be live if the prologue has to call the SHmedia
5933 argument decoder or register saver. */
5934 if (TARGET_SHCOMPACT
5935 && ((crtl->args.info.call_cookie
5936 & ~ CALL_COOKIE_RET_TRAMP (1))
5937 || crtl->saves_all_registers))
5939 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5940 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5942 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5945 ? (/* Need to save all the regs ever live. */
5946 (df_regs_ever_live_p (reg)
5947 || (call_really_used_regs[reg]
5948 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5949 || reg == PIC_OFFSET_TABLE_REGNUM)
5951 || (TARGET_SHMEDIA && has_call
5952 && REGISTER_NATURAL_MODE (reg) == SImode
5953 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5954 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5955 && reg != RETURN_ADDRESS_POINTER_REGNUM
5956 && reg != T_REG && reg != GBR_REG
5957 /* Push fpscr only on targets which have FPU */
5958 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5959 : (/* Only push those regs which are used and need to be saved. */
5962 && crtl->args.info.call_cookie
5963 && reg == PIC_OFFSET_TABLE_REGNUM)
5964 || (df_regs_ever_live_p (reg)
5965 && (!call_really_used_regs[reg]
5966 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5967 || (crtl->calls_eh_return
5968 && (reg == EH_RETURN_DATA_REGNO (0)
5969 || reg == EH_RETURN_DATA_REGNO (1)
5970 || reg == EH_RETURN_DATA_REGNO (2)
5971 || reg == EH_RETURN_DATA_REGNO (3)))
5972 || ((reg == MACL_REG || reg == MACH_REG)
5973 && df_regs_ever_live_p (reg)
5974 && sh_cfun_attr_renesas_p ())
5977 SET_HARD_REG_BIT (*live_regs_mask, reg);
5978 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5980 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5981 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5983 if (FP_REGISTER_P (reg))
5985 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
5987 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5988 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5991 else if (XD_REGISTER_P (reg))
5993 /* Must switch to double mode to access these registers. */
5994 target_flags &= ~MASK_FPU_SINGLE;
5998 if (nosave_low_regs && reg == R8_REG)
6001 /* If we have a target register optimization pass after prologue / epilogue
6002 threading, we need to assume all target registers will be live even if
6004 if (flag_branch_target_load_optimize2
6005 && TARGET_SAVE_ALL_TARGET_REGS
6006 && shmedia_space_reserved_for_target_registers)
6007 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6008 if ((! call_really_used_regs[reg] || interrupt_handler)
6009 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6011 SET_HARD_REG_BIT (*live_regs_mask, reg);
6012 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6014 /* If this is an interrupt handler, we don't have any call-clobbered
6015 registers we can conveniently use for target register save/restore.
6016 Make sure we save at least one general purpose register when we need
6017 to save target registers. */
6018 if (interrupt_handler
6019 && hard_reg_set_intersect_p (*live_regs_mask,
6020 reg_class_contents[TARGET_REGS])
6021 && ! hard_reg_set_intersect_p (*live_regs_mask,
6022 reg_class_contents[GENERAL_REGS]))
6024 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6025 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6031 /* Code to generate prologue and epilogue sequences */
6033 /* PUSHED is the number of bytes that are being pushed on the
6034 stack for register saves. Return the frame size, padded
6035 appropriately so that the stack stays properly aligned. */
6036 static HOST_WIDE_INT
6037 rounded_frame_size (int pushed)
6039 HOST_WIDE_INT size = get_frame_size ();
6040 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6042 return ((size + pushed + align - 1) & -align) - pushed;
6045 /* Choose a call-clobbered target-branch register that remains
6046 unchanged along the whole function. We set it up as the return
6047 value in the prologue. */
6049 sh_media_register_for_return (void)
6054 if (! current_function_is_leaf)
6056 if (lookup_attribute ("interrupt_handler",
6057 DECL_ATTRIBUTES (current_function_decl)))
6059 if (sh_cfun_interrupt_handler_p ())
6062 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6064 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6065 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6071 /* The maximum registers we need to save are:
6072 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6073 - 32 floating point registers (for each pair, we save none,
6074 one single precision value, or a double precision value).
6075 - 8 target registers
6076 - add 1 entry for a delimiter. */
6077 #define MAX_SAVED_REGS (62+32+8)
6079 typedef struct save_entry_s
6088 /* There will be a delimiter entry with VOIDmode both at the start and the
6089 end of a filled in schedule. The end delimiter has the offset of the
6090 save with the smallest (i.e. most negative) offset. */
6091 typedef struct save_schedule_s
6093 save_entry entries[MAX_SAVED_REGS + 2];
6094 int temps[MAX_TEMPS+1];
6097 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6098 use reverse order. Returns the last entry written to (not counting
6099 the delimiter). OFFSET_BASE is a number to be added to all offset
6103 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6107 save_entry *entry = schedule->entries;
6111 if (! current_function_interrupt)
6112 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6113 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6114 && ! FUNCTION_ARG_REGNO_P (i)
6115 && i != FIRST_RET_REG
6116 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6117 && ! (crtl->calls_eh_return
6118 && (i == EH_RETURN_STACKADJ_REGNO
6119 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6120 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6121 schedule->temps[tmpx++] = i;
6123 entry->mode = VOIDmode;
6124 entry->offset = offset_base;
6126 /* We loop twice: first, we save 8-byte aligned registers in the
6127 higher addresses, that are known to be aligned. Then, we
6128 proceed to saving 32-bit registers that don't need 8-byte
6130 If this is an interrupt function, all registers that need saving
6131 need to be saved in full. moreover, we need to postpone saving
6132 target registers till we have saved some general purpose registers
6133 we can then use as scratch registers. */
6134 offset = offset_base;
6135 for (align = 1; align >= 0; align--)
6137 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6138 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6140 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6143 if (current_function_interrupt)
6145 if (TARGET_REGISTER_P (i))
6147 if (GENERAL_REGISTER_P (i))
6150 if (mode == SFmode && (i % 2) == 1
6151 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6152 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6159 /* If we're doing the aligned pass and this is not aligned,
6160 or we're doing the unaligned pass and this is aligned,
6162 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6166 if (current_function_interrupt
6167 && GENERAL_REGISTER_P (i)
6168 && tmpx < MAX_TEMPS)
6169 schedule->temps[tmpx++] = i;
6171 offset -= GET_MODE_SIZE (mode);
6174 entry->offset = offset;
6177 if (align && current_function_interrupt)
6178 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6179 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6181 offset -= GET_MODE_SIZE (DImode);
6183 entry->mode = DImode;
6184 entry->offset = offset;
6189 entry->mode = VOIDmode;
6190 entry->offset = offset;
6191 schedule->temps[tmpx] = -1;
6196 sh_expand_prologue (void)
6198 HARD_REG_SET live_regs_mask;
6201 int save_flags = target_flags;
6204 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6206 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6208 /* We have pretend args if we had an object sent partially in registers
6209 and partially on the stack, e.g. a large structure. */
6210 pretend_args = crtl->args.pretend_args_size;
6211 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6212 && (NPARM_REGS(SImode)
6213 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6215 output_stack_adjust (-pretend_args
6216 - crtl->args.info.stack_regs * 8,
6217 stack_pointer_rtx, 0, NULL);
6219 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6220 /* We're going to use the PIC register to load the address of the
6221 incoming-argument decoder and/or of the return trampoline from
6222 the GOT, so make sure the PIC register is preserved and
6224 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6226 if (TARGET_SHCOMPACT
6227 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6231 /* First, make all registers with incoming arguments that will
6232 be pushed onto the stack live, so that register renaming
6233 doesn't overwrite them. */
6234 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6235 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6236 >= NPARM_REGS (SImode) - reg)
6237 for (; reg < NPARM_REGS (SImode); reg++)
6238 emit_insn (gen_shcompact_preserve_incoming_args
6239 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6240 else if (CALL_COOKIE_INT_REG_GET
6241 (crtl->args.info.call_cookie, reg) == 1)
6242 emit_insn (gen_shcompact_preserve_incoming_args
6243 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6245 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6247 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6248 GEN_INT (crtl->args.info.call_cookie));
6249 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6250 gen_rtx_REG (SImode, R0_REG));
6252 else if (TARGET_SHMEDIA)
6254 int tr = sh_media_register_for_return ();
6257 emit_move_insn (gen_rtx_REG (DImode, tr),
6258 gen_rtx_REG (DImode, PR_MEDIA_REG));
6261 /* Emit the code for SETUP_VARARGS. */
6264 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6266 /* Push arg regs as if they'd been provided by caller in stack. */
6267 for (i = 0; i < NPARM_REGS(SImode); i++)
6269 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6272 if (i >= (NPARM_REGS(SImode)
6273 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6281 /* If we're supposed to switch stacks at function entry, do so now. */
6284 /* The argument specifies a variable holding the address of the
6285 stack the interrupt function should switch to/from at entry/exit. */
6287 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6288 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6290 emit_insn (gen_sp_switch_1 (sp_switch));
6293 d = calc_live_regs (&live_regs_mask);
6294 /* ??? Maybe we could save some switching if we can move a mode switch
6295 that already happens to be at the function start into the prologue. */
6296 if (target_flags != save_flags && ! current_function_interrupt)
6297 emit_insn (gen_toggle_sz ());
6301 int offset_base, offset;
6303 int offset_in_r0 = -1;
6305 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6306 int total_size, save_size;
6307 save_schedule schedule;
6311 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6312 && ! current_function_interrupt)
6313 r0 = gen_rtx_REG (Pmode, R0_REG);
6315 /* D is the actual number of bytes that we need for saving registers,
6316 however, in initial_elimination_offset we have committed to using
6317 an additional TREGS_SPACE amount of bytes - in order to keep both
6318 addresses to arguments supplied by the caller and local variables
6319 valid, we must keep this gap. Place it between the incoming
6320 arguments and the actually saved registers in a bid to optimize
6321 locality of reference. */
6322 total_size = d + tregs_space;
6323 total_size += rounded_frame_size (total_size);
6324 save_size = total_size - rounded_frame_size (d);
6325 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6326 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6327 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6329 /* If adjusting the stack in a single step costs nothing extra, do so.
6330 I.e. either if a single addi is enough, or we need a movi anyway,
6331 and we don't exceed the maximum offset range (the test for the
6332 latter is conservative for simplicity). */
6334 && (CONST_OK_FOR_I10 (-total_size)
6335 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6336 && total_size <= 2044)))
6337 d_rounding = total_size - save_size;
6339 offset_base = d + d_rounding;
6341 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6344 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6345 tmp_pnt = schedule.temps;
6346 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6348 enum machine_mode mode = entry->mode;
6349 unsigned int reg = entry->reg;
6350 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6353 offset = entry->offset;
6355 reg_rtx = gen_rtx_REG (mode, reg);
6357 mem_rtx = gen_frame_mem (mode,
6358 gen_rtx_PLUS (Pmode,
6362 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6369 if (HAVE_PRE_DECREMENT
6370 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6371 || mem_rtx == NULL_RTX
6372 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6374 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6376 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6385 offset += GET_MODE_SIZE (mode);
6389 if (mem_rtx != NULL_RTX)
6392 if (offset_in_r0 == -1)
6394 emit_move_insn (r0, GEN_INT (offset));
6395 offset_in_r0 = offset;
6397 else if (offset != offset_in_r0)
6402 GEN_INT (offset - offset_in_r0)));
6403 offset_in_r0 += offset - offset_in_r0;
6406 if (pre_dec != NULL_RTX)
6412 (Pmode, r0, stack_pointer_rtx));
6416 offset -= GET_MODE_SIZE (mode);
6417 offset_in_r0 -= GET_MODE_SIZE (mode);
6422 mem_rtx = gen_frame_mem (mode, r0);
6424 mem_rtx = gen_frame_mem (mode,
6425 gen_rtx_PLUS (Pmode,
6429 /* We must not use an r0-based address for target-branch
6430 registers or for special registers without pre-dec
6431 memory addresses, since we store their values in r0
6433 gcc_assert (!TARGET_REGISTER_P (reg)
6434 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6435 || mem_rtx == pre_dec));
6438 orig_reg_rtx = reg_rtx;
6439 if (TARGET_REGISTER_P (reg)
6440 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6441 && mem_rtx != pre_dec))
6443 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6445 emit_move_insn (tmp_reg, reg_rtx);
6447 if (REGNO (tmp_reg) == R0_REG)
6451 gcc_assert (!refers_to_regno_p
6452 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6455 if (*++tmp_pnt <= 0)
6456 tmp_pnt = schedule.temps;
6463 /* Mark as interesting for dwarf cfi generator */
6464 insn = emit_move_insn (mem_rtx, reg_rtx);
6465 RTX_FRAME_RELATED_P (insn) = 1;
6466 /* If we use an intermediate register for the save, we can't
6467 describe this exactly in cfi as a copy of the to-be-saved
6468 register into the temporary register and then the temporary
6469 register on the stack, because the temporary register can
6470 have a different natural size than the to-be-saved register.
6471 Thus, we gloss over the intermediate copy and pretend we do
6472 a direct save from the to-be-saved register. */
6473 if (REGNO (reg_rtx) != reg)
6477 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6478 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6480 REG_NOTES (insn) = note_rtx;
6483 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6485 rtx reg_rtx = gen_rtx_REG (mode, reg);
6487 rtx mem_rtx = gen_frame_mem (mode,
6488 gen_rtx_PLUS (Pmode,
6492 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6493 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6495 REG_NOTES (insn) = note_rtx;
6500 gcc_assert (entry->offset == d_rounding);
6503 push_regs (&live_regs_mask, current_function_interrupt);
6505 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6506 emit_insn (gen_GOTaddr2picreg ());
6508 if (SHMEDIA_REGS_STACK_ADJUST ())
6510 /* This must NOT go through the PLT, otherwise mach and macl
6511 may be clobbered. */
6512 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6514 ? "__GCC_push_shmedia_regs"
6515 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6516 emit_insn (gen_shmedia_save_restore_regs_compact
6517 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6520 if (target_flags != save_flags && ! current_function_interrupt)
6521 emit_insn (gen_toggle_sz ());
6523 target_flags = save_flags;
6525 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6526 stack_pointer_rtx, 0, NULL);
6528 if (frame_pointer_needed)
6529 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6531 if (TARGET_SHCOMPACT
6532 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6534 /* This must NOT go through the PLT, otherwise mach and macl
6535 may be clobbered. */
6536 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6537 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6538 emit_insn (gen_shcompact_incoming_args ());
6543 sh_expand_epilogue (bool sibcall_p)
6545 HARD_REG_SET live_regs_mask;
6549 int save_flags = target_flags;
6550 int frame_size, save_size;
6551 int fpscr_deferred = 0;
6552 int e = sibcall_p ? -1 : 1;
6554 d = calc_live_regs (&live_regs_mask);
6557 frame_size = rounded_frame_size (d);
6561 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6563 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6564 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6565 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6567 total_size = d + tregs_space;
6568 total_size += rounded_frame_size (total_size);
6569 save_size = total_size - frame_size;
6571 /* If adjusting the stack in a single step costs nothing extra, do so.
6572 I.e. either if a single addi is enough, or we need a movi anyway,
6573 and we don't exceed the maximum offset range (the test for the
6574 latter is conservative for simplicity). */
6576 && ! frame_pointer_needed
6577 && (CONST_OK_FOR_I10 (total_size)
6578 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6579 && total_size <= 2044)))
6580 d_rounding = frame_size;
6582 frame_size -= d_rounding;
6585 if (frame_pointer_needed)
6587 /* We must avoid scheduling the epilogue with previous basic blocks
6588 when exception handling is enabled. See PR/18032. */
6589 if (flag_exceptions)
6590 emit_insn (gen_blockage ());
6591 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6594 /* We must avoid moving the stack pointer adjustment past code
6595 which reads from the local frame, else an interrupt could
6596 occur after the SP adjustment and clobber data in the local
6598 emit_insn (gen_blockage ());
6599 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6601 else if (frame_size)
6603 /* We must avoid moving the stack pointer adjustment past code
6604 which reads from the local frame, else an interrupt could
6605 occur after the SP adjustment and clobber data in the local
6607 emit_insn (gen_blockage ());
6608 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6611 if (SHMEDIA_REGS_STACK_ADJUST ())
6613 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6615 ? "__GCC_pop_shmedia_regs"
6616 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6617 /* This must NOT go through the PLT, otherwise mach and macl
6618 may be clobbered. */
6619 emit_insn (gen_shmedia_save_restore_regs_compact
6620 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6623 /* Pop all the registers. */
6625 if (target_flags != save_flags && ! current_function_interrupt)
6626 emit_insn (gen_toggle_sz ());
6629 int offset_base, offset;
6630 int offset_in_r0 = -1;
6632 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6633 save_schedule schedule;
6637 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6638 offset_base = -entry[1].offset + d_rounding;
6639 tmp_pnt = schedule.temps;
6640 for (; entry->mode != VOIDmode; entry--)
6642 enum machine_mode mode = entry->mode;
6643 int reg = entry->reg;
6644 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6646 offset = offset_base + entry->offset;
6647 reg_rtx = gen_rtx_REG (mode, reg);
6649 mem_rtx = gen_frame_mem (mode,
6650 gen_rtx_PLUS (Pmode,
6654 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6660 if (HAVE_POST_INCREMENT
6661 && (offset == offset_in_r0
6662 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6663 && mem_rtx == NULL_RTX)
6664 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6666 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6668 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6671 post_inc = NULL_RTX;
6680 if (mem_rtx != NULL_RTX)
6683 if (offset_in_r0 == -1)
6685 emit_move_insn (r0, GEN_INT (offset));
6686 offset_in_r0 = offset;
6688 else if (offset != offset_in_r0)
6693 GEN_INT (offset - offset_in_r0)));
6694 offset_in_r0 += offset - offset_in_r0;
6697 if (post_inc != NULL_RTX)
6703 (Pmode, r0, stack_pointer_rtx));
6709 offset_in_r0 += GET_MODE_SIZE (mode);
6712 mem_rtx = gen_frame_mem (mode, r0);
6714 mem_rtx = gen_frame_mem (mode,
6715 gen_rtx_PLUS (Pmode,
6719 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6720 || mem_rtx == post_inc);
6723 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6724 && mem_rtx != post_inc)
6726 insn = emit_move_insn (r0, mem_rtx);
6729 else if (TARGET_REGISTER_P (reg))
6731 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6733 /* Give the scheduler a bit of freedom by using up to
6734 MAX_TEMPS registers in a round-robin fashion. */
6735 insn = emit_move_insn (tmp_reg, mem_rtx);
6738 tmp_pnt = schedule.temps;
6741 insn = emit_move_insn (reg_rtx, mem_rtx);
6744 gcc_assert (entry->offset + offset_base == d + d_rounding);
6746 else /* ! TARGET_SH5 */
6751 /* For an ISR with RESBANK attribute assigned, don't pop PR
6753 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
6754 && !sh_cfun_resbank_handler_p ())
6756 if (!frame_pointer_needed)
6757 emit_insn (gen_blockage ());
6761 /* Banked registers are poped first to avoid being scheduled in the
6762 delay slot. RTE switches banks before the ds instruction. */
6763 if (current_function_interrupt)
6765 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6766 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6767 pop (LAST_BANKED_REG - i);
6769 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6772 last_reg = FIRST_PSEUDO_REGISTER;
6774 for (i = 0; i < last_reg; i++)
6776 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6778 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6779 && hard_reg_set_intersect_p (live_regs_mask,
6780 reg_class_contents[DF_REGS]))
6782 /* For an ISR with RESBANK attribute assigned, don't pop
6783 following registers, R0-R14, MACH, MACL and GBR. */
6784 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
6785 && ! (sh_cfun_resbank_handler_p ()
6786 && ((j >= FIRST_GENERAL_REG
6787 && j < LAST_GENERAL_REG)
6793 if (j == FIRST_FP_REG && fpscr_deferred)
6797 if (target_flags != save_flags && ! current_function_interrupt)
6798 emit_insn (gen_toggle_sz ());
6799 target_flags = save_flags;
6801 output_stack_adjust (crtl->args.pretend_args_size
6802 + save_size + d_rounding
6803 + crtl->args.info.stack_regs * 8,
6804 stack_pointer_rtx, e, NULL);
6806 if (crtl->calls_eh_return)
6807 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6808 EH_RETURN_STACKADJ_RTX));
6810 /* Switch back to the normal stack if necessary. */
6811 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6812 emit_insn (gen_sp_switch_2 ());
6814 /* Tell flow the insn that pops PR isn't dead. */
6815 /* PR_REG will never be live in SHmedia mode, and we don't need to
6816 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6817 by the return pattern. */
6818 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6819 emit_use (gen_rtx_REG (SImode, PR_REG));
6822 static int sh_need_epilogue_known = 0;
6825 sh_need_epilogue (void)
6827 if (! sh_need_epilogue_known)
6832 sh_expand_epilogue (0);
6833 epilogue = get_insns ();
6835 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6837 return sh_need_epilogue_known > 0;
6840 /* Emit code to change the current function's return address to RA.
6841 TEMP is available as a scratch register, if needed. */
6844 sh_set_return_address (rtx ra, rtx tmp)
6846 HARD_REG_SET live_regs_mask;
6848 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6851 d = calc_live_regs (&live_regs_mask);
6853 /* If pr_reg isn't life, we can set it (or the register given in
6854 sh_media_register_for_return) directly. */
6855 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6861 int rr_regno = sh_media_register_for_return ();
6866 rr = gen_rtx_REG (DImode, rr_regno);
6869 rr = gen_rtx_REG (SImode, pr_reg);
6871 emit_insn (GEN_MOV (rr, ra));
6872 /* Tell flow the register for return isn't dead. */
6880 save_schedule schedule;
6883 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6884 offset = entry[1].offset;
6885 for (; entry->mode != VOIDmode; entry--)
6886 if (entry->reg == pr_reg)
6889 /* We can't find pr register. */
6893 offset = entry->offset - offset;
6894 pr_offset = (rounded_frame_size (d) + offset
6895 + SHMEDIA_REGS_STACK_ADJUST ());
6898 pr_offset = rounded_frame_size (d);
6900 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6901 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6903 tmp = gen_frame_mem (Pmode, tmp);
6904 emit_insn (GEN_MOV (tmp, ra));
6907 /* Clear variables at function end. */
6910 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6911 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6913 sh_need_epilogue_known = 0;
6917 sh_builtin_saveregs (void)
6919 /* First unnamed integer register. */
6920 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
6921 /* Number of integer registers we need to save. */
6922 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6923 /* First unnamed SFmode float reg */
6924 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
6925 /* Number of SFmode float regs to save. */
6926 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6929 alias_set_type alias_set;
6935 int pushregs = n_intregs;
6937 while (pushregs < NPARM_REGS (SImode) - 1
6938 && (CALL_COOKIE_INT_REG_GET
6939 (crtl->args.info.call_cookie,
6940 NPARM_REGS (SImode) - pushregs)
6943 crtl->args.info.call_cookie
6944 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6949 if (pushregs == NPARM_REGS (SImode))
6950 crtl->args.info.call_cookie
6951 |= (CALL_COOKIE_INT_REG (0, 1)
6952 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6954 crtl->args.info.call_cookie
6955 |= CALL_COOKIE_STACKSEQ (pushregs);
6957 crtl->args.pretend_args_size += 8 * n_intregs;
6959 if (TARGET_SHCOMPACT)
6963 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6965 error ("__builtin_saveregs not supported by this subtarget");
6972 /* Allocate block of memory for the regs. */
6973 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6974 Or can assign_stack_local accept a 0 SIZE argument? */
6975 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6978 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6979 else if (n_floatregs & 1)
6983 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6984 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6985 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6986 regbuf = change_address (regbuf, BLKmode, addr);
6988 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6992 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6993 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6994 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6995 emit_insn (gen_andsi3 (addr, addr, mask));
6996 regbuf = change_address (regbuf, BLKmode, addr);
6999 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7000 alias_set = get_varargs_alias_set ();
7001 set_mem_alias_set (regbuf, alias_set);
7004 This is optimized to only save the regs that are necessary. Explicitly
7005 named args need not be saved. */
7007 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7008 adjust_address (regbuf, BLKmode,
7009 n_floatregs * UNITS_PER_WORD),
7013 /* Return the address of the regbuf. */
7014 return XEXP (regbuf, 0);
7017 This is optimized to only save the regs that are necessary. Explicitly
7018 named args need not be saved.
7019 We explicitly build a pointer to the buffer because it halves the insn
7020 count when not optimizing (otherwise the pointer is built for each reg
7022 We emit the moves in reverse order so that we can use predecrement. */
7024 fpregs = copy_to_mode_reg (Pmode,
7025 plus_constant (XEXP (regbuf, 0),
7026 n_floatregs * UNITS_PER_WORD));
7027 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7030 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7032 emit_insn (gen_addsi3 (fpregs, fpregs,
7033 GEN_INT (-2 * UNITS_PER_WORD)));
7034 mem = change_address (regbuf, DFmode, fpregs);
7035 emit_move_insn (mem,
7036 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7038 regno = first_floatreg;
7041 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7042 mem = change_address (regbuf, SFmode, fpregs);
7043 emit_move_insn (mem,
7044 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7045 - (TARGET_LITTLE_ENDIAN != 0)));
7049 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7053 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7054 mem = change_address (regbuf, SFmode, fpregs);
7055 emit_move_insn (mem,
7056 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7059 /* Return the address of the regbuf. */
7060 return XEXP (regbuf, 0);
7063 /* Define the `__builtin_va_list' type for the ABI. */
7066 sh_build_builtin_va_list (void)
7068 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7071 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7072 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7073 return ptr_type_node;
7075 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7077 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7079 f_next_o_limit = build_decl (FIELD_DECL,
7080 get_identifier ("__va_next_o_limit"),
7082 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7084 f_next_fp_limit = build_decl (FIELD_DECL,
7085 get_identifier ("__va_next_fp_limit"),
7087 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7090 DECL_FIELD_CONTEXT (f_next_o) = record;
7091 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7092 DECL_FIELD_CONTEXT (f_next_fp) = record;
7093 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7094 DECL_FIELD_CONTEXT (f_next_stack) = record;
7096 TYPE_FIELDS (record) = f_next_o;
7097 TREE_CHAIN (f_next_o) = f_next_o_limit;
7098 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7099 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7100 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7102 layout_type (record);
7107 /* Implement `va_start' for varargs and stdarg. */
7110 sh_va_start (tree valist, rtx nextarg)
7112 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7113 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7119 expand_builtin_saveregs ();
7120 std_expand_builtin_va_start (valist, nextarg);
7124 if ((! TARGET_SH2E && ! TARGET_SH4)
7125 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7127 std_expand_builtin_va_start (valist, nextarg);
7131 f_next_o = TYPE_FIELDS (va_list_type_node);
7132 f_next_o_limit = TREE_CHAIN (f_next_o);
7133 f_next_fp = TREE_CHAIN (f_next_o_limit);
7134 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7135 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7137 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7139 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7140 valist, f_next_o_limit, NULL_TREE);
7141 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7143 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7144 valist, f_next_fp_limit, NULL_TREE);
7145 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7146 valist, f_next_stack, NULL_TREE);
7148 /* Call __builtin_saveregs. */
7149 u = make_tree (sizetype, expand_builtin_saveregs ());
7150 u = fold_convert (ptr_type_node, u);
7151 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7152 TREE_SIDE_EFFECTS (t) = 1;
7153 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7155 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7160 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7161 size_int (UNITS_PER_WORD * nfp));
7162 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7163 TREE_SIDE_EFFECTS (t) = 1;
7164 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7166 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7167 TREE_SIDE_EFFECTS (t) = 1;
7168 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7170 nint = crtl->args.info.arg_count[SH_ARG_INT];
7175 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7176 size_int (UNITS_PER_WORD * nint));
7177 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7178 TREE_SIDE_EFFECTS (t) = 1;
7179 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7181 u = make_tree (ptr_type_node, nextarg);
7182 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7183 TREE_SIDE_EFFECTS (t) = 1;
7184 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7187 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7188 member, return it. */
7190 find_sole_member (tree type)
7192 tree field, member = NULL_TREE;
7194 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7196 if (TREE_CODE (field) != FIELD_DECL)
7198 if (!DECL_SIZE (field))
7200 if (integer_zerop (DECL_SIZE (field)))
7208 /* Implement `va_arg'. */
7211 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7212 gimple_seq *post_p ATTRIBUTE_UNUSED)
7214 HOST_WIDE_INT size, rsize;
7215 tree tmp, pptr_type_node;
7216 tree addr, lab_over = NULL, result = NULL;
7217 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7221 type = build_pointer_type (type);
7223 size = int_size_in_bytes (type);
7224 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7225 pptr_type_node = build_pointer_type (ptr_type_node);
7227 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7228 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7230 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7231 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7236 f_next_o = TYPE_FIELDS (va_list_type_node);
7237 f_next_o_limit = TREE_CHAIN (f_next_o);
7238 f_next_fp = TREE_CHAIN (f_next_o_limit);
7239 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7240 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7242 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7244 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7245 valist, f_next_o_limit, NULL_TREE);
7246 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7247 valist, f_next_fp, NULL_TREE);
7248 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7249 valist, f_next_fp_limit, NULL_TREE);
7250 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7251 valist, f_next_stack, NULL_TREE);
7253 /* Structures with a single member with a distinct mode are passed
7254 like their member. This is relevant if the latter has a REAL_TYPE
7255 or COMPLEX_TYPE type. */
7257 while (TREE_CODE (eff_type) == RECORD_TYPE
7258 && (member = find_sole_member (eff_type))
7259 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7260 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7261 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7263 tree field_type = TREE_TYPE (member);
7265 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7266 eff_type = field_type;
7269 gcc_assert ((TYPE_ALIGN (eff_type)
7270 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7271 || (TYPE_ALIGN (eff_type)
7272 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7277 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7279 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7280 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7281 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7286 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7289 addr = create_tmp_var (pptr_type_node, NULL);
7290 lab_false = create_artificial_label ();
7291 lab_over = create_artificial_label ();
7293 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7297 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7299 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7301 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7302 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7304 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7305 tmp = next_fp_limit;
7306 if (size > 4 && !is_double)
7307 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7308 unshare_expr (tmp), size_int (4 - size));
7309 tmp = build2 (GE_EXPR, boolean_type_node,
7310 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7311 cmp = build3 (COND_EXPR, void_type_node, tmp,
7312 build1 (GOTO_EXPR, void_type_node,
7313 unshare_expr (lab_false)), NULL_TREE);
7315 gimplify_and_add (cmp, pre_p);
7317 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7318 || (is_double || size == 16))
7320 tmp = fold_convert (sizetype, next_fp_tmp);
7321 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7322 size_int (UNITS_PER_WORD));
7323 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7324 unshare_expr (next_fp_tmp), tmp);
7325 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7328 gimplify_and_add (cmp, pre_p);
7330 #ifdef FUNCTION_ARG_SCmode_WART
7331 if (TYPE_MODE (eff_type) == SCmode
7332 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7334 tree subtype = TREE_TYPE (eff_type);
7338 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7339 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7342 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7343 real = get_initialized_tmp_var (real, pre_p, NULL);
7345 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7346 if (type != eff_type)
7347 result = build1 (VIEW_CONVERT_EXPR, type, result);
7348 result = get_initialized_tmp_var (result, pre_p, NULL);
7350 #endif /* FUNCTION_ARG_SCmode_WART */
7352 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7353 gimplify_and_add (tmp, pre_p);
7355 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7356 gimplify_and_add (tmp, pre_p);
7358 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7359 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7360 gimplify_assign (unshare_expr (next_fp_tmp),
7361 unshare_expr (valist), pre_p);
7363 gimplify_assign (unshare_expr (valist),
7364 unshare_expr (next_fp_tmp), post_p);
7365 valist = next_fp_tmp;
7369 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7370 unshare_expr (next_o), size_int (rsize));
7371 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7372 unshare_expr (next_o_limit));
7373 tmp = build3 (COND_EXPR, void_type_node, tmp,
7374 build1 (GOTO_EXPR, void_type_node,
7375 unshare_expr (lab_false)),
7377 gimplify_and_add (tmp, pre_p);
7379 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7380 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7382 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7383 gimplify_and_add (tmp, pre_p);
7385 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7386 gimplify_and_add (tmp, pre_p);
7388 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7389 gimplify_assign (unshare_expr (next_o),
7390 unshare_expr (next_o_limit), pre_p);
7392 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7393 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7398 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7399 gimplify_and_add (tmp, pre_p);
7403 /* ??? In va-sh.h, there had been code to make values larger than
7404 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7406 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7409 gimplify_assign (result, tmp, pre_p);
7411 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7412 gimplify_and_add (tmp, pre_p);
7418 result = build_va_arg_indirect_ref (result);
7424 sh_promote_prototypes (const_tree type)
7430 return ! sh_attr_renesas_p (type);
7433 /* Whether an argument must be passed by reference. On SHcompact, we
7434 pretend arguments wider than 32-bits that would have been passed in
7435 registers are passed by reference, so that an SHmedia trampoline
7436 loads them into the full 64-bits registers. */
7439 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7440 const_tree type, bool named)
7442 unsigned HOST_WIDE_INT size;
7445 size = int_size_in_bytes (type);
7447 size = GET_MODE_SIZE (mode);
7449 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7451 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7452 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7453 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7455 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7456 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7463 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7464 const_tree type, bool named)
7466 if (targetm.calls.must_pass_in_stack (mode, type))
7469 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7470 wants to know about pass-by-reference semantics for incoming
7475 if (TARGET_SHCOMPACT)
7477 cum->byref = shcompact_byref (cum, mode, type, named);
7478 return cum->byref != 0;
7485 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7486 const_tree type, bool named ATTRIBUTE_UNUSED)
7488 /* ??? How can it possibly be correct to return true only on the
7489 caller side of the equation? Is there someplace else in the
7490 sh backend that's magically producing the copies? */
7491 return (cum->outgoing
7492 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7493 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7497 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7498 tree type, bool named ATTRIBUTE_UNUSED)
7503 && PASS_IN_REG_P (*cum, mode, type)
7504 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7505 && (ROUND_REG (*cum, mode)
7507 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7508 : ROUND_ADVANCE (int_size_in_bytes (type)))
7509 > NPARM_REGS (mode)))
7510 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7512 else if (!TARGET_SHCOMPACT
7513 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7514 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7516 return words * UNITS_PER_WORD;
7520 /* Define where to put the arguments to a function.
7521 Value is zero to push the argument on the stack,
7522 or a hard register in which to store the argument.
7524 MODE is the argument's machine mode.
7525 TYPE is the data type of the argument (as a tree).
7526 This is null for libcalls where that information may
7528 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7529 the preceding args and about the function being called.
7530 NAMED is nonzero if this argument is a named parameter
7531 (otherwise it is an extra parameter matching an ellipsis).
7533 On SH the first args are normally in registers
7534 and the rest are pushed. Any arg that starts within the first
7535 NPARM_REGS words is at least partially passed in a register unless
7536 its data type forbids. */
7540 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7541 tree type, int named)
7543 if (! TARGET_SH5 && mode == VOIDmode)
7544 return GEN_INT (ca->renesas_abi ? 1 : 0);
7547 && PASS_IN_REG_P (*ca, mode, type)
7548 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7552 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7553 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7555 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7556 gen_rtx_REG (SFmode,
7558 + (ROUND_REG (*ca, mode) ^ 1)),
7560 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7561 gen_rtx_REG (SFmode,
7563 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7565 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7568 /* If the alignment of a DF value causes an SF register to be
7569 skipped, we will use that skipped register for the next SF
7571 if ((TARGET_HITACHI || ca->renesas_abi)
7572 && ca->free_single_fp_reg
7574 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7576 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7577 ^ (mode == SFmode && TARGET_SH4
7578 && TARGET_LITTLE_ENDIAN != 0
7579 && ! TARGET_HITACHI && ! ca->renesas_abi);
7580 return gen_rtx_REG (mode, regno);
7586 if (mode == VOIDmode && TARGET_SHCOMPACT)
7587 return GEN_INT (ca->call_cookie);
7589 /* The following test assumes unnamed arguments are promoted to
7591 if (mode == SFmode && ca->free_single_fp_reg)
7592 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7594 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7595 && (named || ! ca->prototype_p)
7596 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7598 if (! ca->prototype_p && TARGET_SHMEDIA)
7599 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7601 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7603 + ca->arg_count[(int) SH_ARG_FLOAT]);
7606 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7607 && (! TARGET_SHCOMPACT
7608 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7609 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7612 return gen_rtx_REG (mode, (FIRST_PARM_REG
7613 + ca->arg_count[(int) SH_ARG_INT]));
7622 /* Update the data in CUM to advance over an argument
7623 of mode MODE and data type TYPE.
7624 (TYPE is null for libcalls where that information may not be
7628 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7629 tree type, int named)
7633 else if (TARGET_SH5)
7635 tree type2 = (ca->byref && type
7638 enum machine_mode mode2 = (ca->byref && type
7641 int dwords = ((ca->byref
7644 ? int_size_in_bytes (type2)
7645 : GET_MODE_SIZE (mode2)) + 7) / 8;
7646 int numregs = MIN (dwords, NPARM_REGS (SImode)
7647 - ca->arg_count[(int) SH_ARG_INT]);
7651 ca->arg_count[(int) SH_ARG_INT] += numregs;
7652 if (TARGET_SHCOMPACT
7653 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7656 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7658 /* N.B. We want this also for outgoing. */
7659 ca->stack_regs += numregs;
7664 ca->stack_regs += numregs;
7665 ca->byref_regs += numregs;
7669 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7673 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7676 else if (dwords > numregs)
7678 int pushregs = numregs;
7680 if (TARGET_SHCOMPACT)
7681 ca->stack_regs += numregs;
7682 while (pushregs < NPARM_REGS (SImode) - 1
7683 && (CALL_COOKIE_INT_REG_GET
7685 NPARM_REGS (SImode) - pushregs)
7689 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7693 if (numregs == NPARM_REGS (SImode))
7695 |= CALL_COOKIE_INT_REG (0, 1)
7696 | CALL_COOKIE_STACKSEQ (numregs - 1);
7699 |= CALL_COOKIE_STACKSEQ (numregs);
7702 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7703 && (named || ! ca->prototype_p))
7705 if (mode2 == SFmode && ca->free_single_fp_reg)
7706 ca->free_single_fp_reg = 0;
7707 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7708 < NPARM_REGS (SFmode))
7711 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7713 - ca->arg_count[(int) SH_ARG_FLOAT]);
7715 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7717 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7719 if (ca->outgoing && numregs > 0)
7723 |= (CALL_COOKIE_INT_REG
7724 (ca->arg_count[(int) SH_ARG_INT]
7725 - numregs + ((numfpregs - 2) / 2),
7726 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7729 while (numfpregs -= 2);
7731 else if (mode2 == SFmode && (named)
7732 && (ca->arg_count[(int) SH_ARG_FLOAT]
7733 < NPARM_REGS (SFmode)))
7734 ca->free_single_fp_reg
7735 = FIRST_FP_PARM_REG - numfpregs
7736 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7742 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7744 /* Note that we've used the skipped register. */
7745 if (mode == SFmode && ca->free_single_fp_reg)
7747 ca->free_single_fp_reg = 0;
7750 /* When we have a DF after an SF, there's an SF register that get
7751 skipped in order to align the DF value. We note this skipped
7752 register, because the next SF value will use it, and not the
7753 SF that follows the DF. */
7755 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7757 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7758 + BASE_ARG_REG (mode));
7762 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7763 || PASS_IN_REG_P (*ca, mode, type))
7764 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7765 = (ROUND_REG (*ca, mode)
7767 ? ROUND_ADVANCE (int_size_in_bytes (type))
7768 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7771 /* The Renesas calling convention doesn't quite fit into this scheme since
7772 the address is passed like an invisible argument, but one that is always
7773 passed in memory. */
7775 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7777 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7779 return gen_rtx_REG (Pmode, 2);
7782 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7785 sh_return_in_memory (const_tree type, const_tree fndecl)
7789 if (TYPE_MODE (type) == BLKmode)
7790 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7792 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7796 return (TYPE_MODE (type) == BLKmode
7797 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7798 && TREE_CODE (type) == RECORD_TYPE));
7802 /* We actually emit the code in sh_expand_prologue. We used to use
7803 a static variable to flag that we need to emit this code, but that
7804 doesn't when inlining, when functions are deferred and then emitted
7805 later. Fortunately, we already have two flags that are part of struct
7806 function that tell if a function uses varargs or stdarg. */
7808 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7809 enum machine_mode mode,
7811 int *pretend_arg_size,
7812 int second_time ATTRIBUTE_UNUSED)
7814 gcc_assert (cfun->stdarg);
7815 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7817 int named_parm_regs, anon_parm_regs;
7819 named_parm_regs = (ROUND_REG (*ca, mode)
7821 ? ROUND_ADVANCE (int_size_in_bytes (type))
7822 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7823 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7824 if (anon_parm_regs > 0)
7825 *pretend_arg_size = anon_parm_regs * 4;
7830 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7836 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7838 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7842 /* Define the offset between two registers, one to be eliminated, and
7843 the other its replacement, at the start of a routine. */
7846 initial_elimination_offset (int from, int to)
7849 int regs_saved_rounding = 0;
7850 int total_saved_regs_space;
7851 int total_auto_space;
7852 int save_flags = target_flags;
7854 HARD_REG_SET live_regs_mask;
7856 shmedia_space_reserved_for_target_registers = false;
7857 regs_saved = calc_live_regs (&live_regs_mask);
7858 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7860 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7862 shmedia_space_reserved_for_target_registers = true;
7863 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7866 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7867 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7868 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7870 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7871 copy_flags = target_flags;
7872 target_flags = save_flags;
7874 total_saved_regs_space = regs_saved + regs_saved_rounding;
7876 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7877 return total_saved_regs_space + total_auto_space
7878 + crtl->args.info.byref_regs * 8;
7880 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7881 return total_saved_regs_space + total_auto_space
7882 + crtl->args.info.byref_regs * 8;
7884 /* Initial gap between fp and sp is 0. */
7885 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7888 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7889 return rounded_frame_size (0);
7891 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7892 return rounded_frame_size (0);
7894 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7895 && (to == HARD_FRAME_POINTER_REGNUM
7896 || to == STACK_POINTER_REGNUM));
7899 int n = total_saved_regs_space;
7900 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7901 save_schedule schedule;
7904 n += total_auto_space;
7906 /* If it wasn't saved, there's not much we can do. */
7907 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7910 target_flags = copy_flags;
7912 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7913 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7914 if (entry->reg == pr_reg)
7916 target_flags = save_flags;
7917 return entry->offset;
7922 return total_auto_space;
7925 /* Parse the -mfixed-range= option string. */
7927 sh_fix_range (const char *const_str)
7930 char *str, *dash, *comma;
7932 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
7933 REG2 are either register names or register numbers. The effect
7934 of this option is to mark the registers in the range from REG1 to
7935 REG2 as ``fixed'' so they won't be used by the compiler. */
7937 i = strlen (const_str);
7938 str = (char *) alloca (i + 1);
7939 memcpy (str, const_str, i + 1);
7943 dash = strchr (str, '-');
7946 warning (0, "value of -mfixed-range must have form REG1-REG2");
7950 comma = strchr (dash + 1, ',');
7954 first = decode_reg_name (str);
7957 warning (0, "unknown register name: %s", str);
7961 last = decode_reg_name (dash + 1);
7964 warning (0, "unknown register name: %s", dash + 1);
7972 warning (0, "%s-%s is an empty range", str, dash + 1);
7976 for (i = first; i <= last; ++i)
7977 fixed_regs[i] = call_used_regs[i] = 1;
7987 /* Insert any deferred function attributes from earlier pragmas. */
7989 sh_insert_attributes (tree node, tree *attributes)
7993 if (TREE_CODE (node) != FUNCTION_DECL)
7996 /* We are only interested in fields. */
8000 /* Append the attributes to the deferred attributes. */
8001 *sh_deferred_function_attributes_tail = *attributes;
8002 attrs = sh_deferred_function_attributes;
8006 /* Some attributes imply or require the interrupt attribute. */
8007 if (!lookup_attribute ("interrupt_handler", attrs)
8008 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8010 /* If we have a trapa_handler, but no interrupt_handler attribute,
8011 insert an interrupt_handler attribute. */
8012 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8013 /* We can't use sh_pr_interrupt here because that's not in the
8016 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8017 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8018 if the interrupt attribute is missing, we ignore the attribute
8020 else if (lookup_attribute ("sp_switch", attrs)
8021 || lookup_attribute ("trap_exit", attrs)
8022 || lookup_attribute ("nosave_low_regs", attrs)
8023 || lookup_attribute ("resbank", attrs))
8027 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8029 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8030 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8031 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8032 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8033 warning (OPT_Wattributes,
8034 "%qs attribute only applies to interrupt functions",
8035 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
8038 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8040 tail = &TREE_CHAIN (*tail);
8043 attrs = *attributes;
8047 /* Install the processed list. */
8048 *attributes = attrs;
8050 /* Clear deferred attributes. */
8051 sh_deferred_function_attributes = NULL_TREE;
8052 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8057 /* Supported attributes:
8059 interrupt_handler -- specifies this function is an interrupt handler.
8061 trapa_handler - like above, but don't save all registers.
8063 sp_switch -- specifies an alternate stack for an interrupt handler
8066 trap_exit -- use a trapa to exit an interrupt function instead of
8069 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8070 This is useful on the SH3 and upwards,
8071 which has a separate set of low regs for User and Supervisor modes.
8072 This should only be used for the lowest level of interrupts. Higher levels
8073 of interrupts must save the registers in case they themselves are
8076 renesas -- use Renesas calling/layout conventions (functions and
8079 resbank -- In case of an ISR, use a register bank to save registers
8080 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8083 const struct attribute_spec sh_attribute_table[] =
8085 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
8086 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8087 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
8088 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
8089 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
8090 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8091 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8092 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
8093 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
8095 /* Symbian support adds three new attributes:
8096 dllexport - for exporting a function/variable that will live in a dll
8097 dllimport - for importing a function/variable from a dll
8099 Microsoft allows multiple declspecs in one __declspec, separating
8100 them with spaces. We do NOT support this. Instead, use __declspec
8102 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8103 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8105 { NULL, 0, 0, false, false, false, NULL }
8108 /* Handle a 'resbank' attribute. */
8110 sh_handle_resbank_handler_attribute (tree * node, tree name,
8111 tree args ATTRIBUTE_UNUSED,
8112 int flags ATTRIBUTE_UNUSED,
8113 bool * no_add_attrs)
8117 warning (OPT_Wattributes, "%qs attribute is supported only for SH2A",
8118 IDENTIFIER_POINTER (name));
8119 *no_add_attrs = true;
8121 if (TREE_CODE (*node) != FUNCTION_DECL)
8123 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8124 IDENTIFIER_POINTER (name));
8125 *no_add_attrs = true;
8131 /* Handle an "interrupt_handler" attribute; arguments as in
8132 struct attribute_spec.handler. */
8134 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8135 tree args ATTRIBUTE_UNUSED,
8136 int flags ATTRIBUTE_UNUSED,
8139 if (TREE_CODE (*node) != FUNCTION_DECL)
8141 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8142 IDENTIFIER_POINTER (name));
8143 *no_add_attrs = true;
8145 else if (TARGET_SHCOMPACT)
8147 error ("attribute interrupt_handler is not compatible with -m5-compact");
8148 *no_add_attrs = true;
8154 /* Handle an 'function_vector' attribute; arguments as in
8155 struct attribute_spec.handler. */
8157 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8158 tree args ATTRIBUTE_UNUSED,
8159 int flags ATTRIBUTE_UNUSED,
8160 bool * no_add_attrs)
8164 warning (OPT_Wattributes, "%qs attribute only applies to SH2A",
8165 IDENTIFIER_POINTER (name));
8166 *no_add_attrs = true;
8168 else if (TREE_CODE (*node) != FUNCTION_DECL)
8170 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8171 IDENTIFIER_POINTER (name));
8172 *no_add_attrs = true;
8174 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8176 /* The argument must be a constant integer. */
8177 warning (OPT_Wattributes,
8178 "`%s' attribute argument not an integer constant",
8179 IDENTIFIER_POINTER (name));
8180 *no_add_attrs = true;
8182 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8184 /* The argument value must be between 0 to 255. */
8185 warning (OPT_Wattributes,
8186 "`%s' attribute argument should be between 0 to 255",
8187 IDENTIFIER_POINTER (name));
8188 *no_add_attrs = true;
8193 /* Returns 1 if current function has been assigned the attribute
8194 'function_vector'. */
8196 sh2a_is_function_vector_call (rtx x)
8198 if (GET_CODE (x) == SYMBOL_REF
8199 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8201 tree tr = SYMBOL_REF_DECL (x);
8203 if (sh2a_function_vector_p (tr))
8210 /* Returns the function vector number, if the the attribute
8211 'function_vector' is assigned, otherwise returns zero. */
8213 sh2a_get_function_vector_number (rtx x)
8218 if ((GET_CODE (x) == SYMBOL_REF)
8219 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8221 t = SYMBOL_REF_DECL (x);
8223 if (TREE_CODE (t) != FUNCTION_DECL)
8226 list = SH_ATTRIBUTES (t);
8229 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8231 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8235 list = TREE_CHAIN (list);
8244 /* Handle an "sp_switch" attribute; arguments as in
8245 struct attribute_spec.handler. */
8247 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8248 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8250 if (TREE_CODE (*node) != FUNCTION_DECL)
8252 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8253 IDENTIFIER_POINTER (name));
8254 *no_add_attrs = true;
8256 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8258 /* The argument must be a constant string. */
8259 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8260 IDENTIFIER_POINTER (name));
8261 *no_add_attrs = true;
8267 /* Handle an "trap_exit" attribute; arguments as in
8268 struct attribute_spec.handler. */
8270 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8271 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8273 if (TREE_CODE (*node) != FUNCTION_DECL)
8275 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8276 IDENTIFIER_POINTER (name));
8277 *no_add_attrs = true;
8279 /* The argument specifies a trap number to be used in a trapa instruction
8280 at function exit (instead of an rte instruction). */
8281 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8283 /* The argument must be a constant integer. */
8284 warning (OPT_Wattributes, "%qs attribute argument not an "
8285 "integer constant", IDENTIFIER_POINTER (name));
8286 *no_add_attrs = true;
8293 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8294 tree name ATTRIBUTE_UNUSED,
8295 tree args ATTRIBUTE_UNUSED,
8296 int flags ATTRIBUTE_UNUSED,
8297 bool *no_add_attrs ATTRIBUTE_UNUSED)
8302 /* True if __attribute__((renesas)) or -mrenesas. */
8304 sh_attr_renesas_p (const_tree td)
8311 td = TREE_TYPE (td);
8312 if (td == error_mark_node)
8314 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8318 /* True if __attribute__((renesas)) or -mrenesas, for the current
8321 sh_cfun_attr_renesas_p (void)
8323 return sh_attr_renesas_p (current_function_decl);
8327 sh_cfun_interrupt_handler_p (void)
8329 return (lookup_attribute ("interrupt_handler",
8330 DECL_ATTRIBUTES (current_function_decl))
8334 /* Returns 1 if FUNC has been assigned the attribute
8335 "function_vector". */
8337 sh2a_function_vector_p (tree func)
8340 if (TREE_CODE (func) != FUNCTION_DECL)
8343 list = SH_ATTRIBUTES (func);
8346 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8349 list = TREE_CHAIN (list);
8354 /* Returns TRUE if given tree has the "resbank" attribute. */
8357 sh_cfun_resbank_handler_p (void)
8359 return ((lookup_attribute ("resbank",
8360 DECL_ATTRIBUTES (current_function_decl))
8362 && (lookup_attribute ("interrupt_handler",
8363 DECL_ATTRIBUTES (current_function_decl))
8364 != NULL_TREE) && TARGET_SH2A);
8367 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8370 sh_check_pch_target_flags (int old_flags)
8372 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8373 | MASK_SH_E | MASK_HARD_SH4
8374 | MASK_FPU_SINGLE | MASK_SH4))
8375 return _("created and used with different architectures / ABIs");
8376 if ((old_flags ^ target_flags) & MASK_HITACHI)
8377 return _("created and used with different ABIs");
8378 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8379 return _("created and used with different endianness");
8383 /* Predicates used by the templates. */
8385 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8386 Used only in general_movsrc_operand. */
8389 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8401 /* Nonzero if OP is a floating point value with value 0.0. */
8404 fp_zero_operand (rtx op)
8408 if (GET_MODE (op) != SFmode)
8411 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8412 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8415 /* Nonzero if OP is a floating point value with value 1.0. */
8418 fp_one_operand (rtx op)
8422 if (GET_MODE (op) != SFmode)
8425 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8426 return REAL_VALUES_EQUAL (r, dconst1);
8429 /* For -m4 and -m4-single-only, mode switching is used. If we are
8430 compiling without -mfmovd, movsf_ie isn't taken into account for
8431 mode switching. We could check in machine_dependent_reorg for
8432 cases where we know we are in single precision mode, but there is
8433 interface to find that out during reload, so we must avoid
8434 choosing an fldi alternative during reload and thus failing to
8435 allocate a scratch register for the constant loading. */
8439 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8443 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8445 enum rtx_code code = GET_CODE (op);
8446 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8449 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8451 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8453 if (GET_CODE (op) != SYMBOL_REF)
8455 return SYMBOL_REF_TLS_MODEL (op);
8458 /* Return the destination address of a branch. */
8461 branch_dest (rtx branch)
8463 rtx dest = SET_SRC (PATTERN (branch));
8466 if (GET_CODE (dest) == IF_THEN_ELSE)
8467 dest = XEXP (dest, 1);
8468 dest = XEXP (dest, 0);
8469 dest_uid = INSN_UID (dest);
8470 return INSN_ADDRESSES (dest_uid);
8473 /* Return nonzero if REG is not used after INSN.
8474 We assume REG is a reload reg, and therefore does
8475 not live past labels. It may live past calls or jumps though. */
8477 reg_unused_after (rtx reg, rtx insn)
8482 /* If the reg is set by this instruction, then it is safe for our
8483 case. Disregard the case where this is a store to memory, since
8484 we are checking a register used in the store address. */
8485 set = single_set (insn);
8486 if (set && GET_CODE (SET_DEST (set)) != MEM
8487 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8490 while ((insn = NEXT_INSN (insn)))
8496 code = GET_CODE (insn);
8499 /* If this is a label that existed before reload, then the register
8500 if dead here. However, if this is a label added by reorg, then
8501 the register may still be live here. We can't tell the difference,
8502 so we just ignore labels completely. */
8503 if (code == CODE_LABEL)
8508 if (code == JUMP_INSN)
8511 /* If this is a sequence, we must handle them all at once.
8512 We could have for instance a call that sets the target register,
8513 and an insn in a delay slot that uses the register. In this case,
8514 we must return 0. */
8515 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8520 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8522 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8523 rtx set = single_set (this_insn);
8525 if (GET_CODE (this_insn) == CALL_INSN)
8527 else if (GET_CODE (this_insn) == JUMP_INSN)
8529 if (INSN_ANNULLED_BRANCH_P (this_insn))
8534 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8536 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8538 if (GET_CODE (SET_DEST (set)) != MEM)
8544 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8549 else if (code == JUMP_INSN)
8553 set = single_set (insn);
8554 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8556 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8557 return GET_CODE (SET_DEST (set)) != MEM;
8558 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8561 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8569 static GTY(()) rtx fpscr_rtx;
8571 get_fpscr_rtx (void)
8575 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8576 REG_USERVAR_P (fpscr_rtx) = 1;
8577 mark_user_reg (fpscr_rtx);
8579 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8580 mark_user_reg (fpscr_rtx);
8584 static GTY(()) tree fpscr_values;
8587 emit_fpu_switch (rtx scratch, int index)
8591 if (fpscr_values == NULL)
8595 t = build_index_type (integer_one_node);
8596 t = build_array_type (integer_type_node, t);
8597 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8598 DECL_ARTIFICIAL (t) = 1;
8599 DECL_IGNORED_P (t) = 1;
8600 DECL_EXTERNAL (t) = 1;
8601 TREE_STATIC (t) = 1;
8602 TREE_PUBLIC (t) = 1;
8608 src = DECL_RTL (fpscr_values);
8609 if (!can_create_pseudo_p ())
8611 emit_move_insn (scratch, XEXP (src, 0));
8613 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8614 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8617 src = adjust_address (src, PSImode, index * 4);
8619 dst = get_fpscr_rtx ();
8620 emit_move_insn (dst, src);
8624 emit_sf_insn (rtx pat)
8630 emit_df_insn (rtx pat)
8636 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8638 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8642 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8644 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8649 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8651 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8655 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8657 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8661 static rtx get_free_reg (HARD_REG_SET);
8663 /* This function returns a register to use to load the address to load
8664 the fpscr from. Currently it always returns r1 or r7, but when we are
8665 able to use pseudo registers after combine, or have a better mechanism
8666 for choosing a register, it should be done here. */
8667 /* REGS_LIVE is the liveness information for the point for which we
8668 need this allocation. In some bare-bones exit blocks, r1 is live at the
8669 start. We can even have all of r0..r3 being live:
8670 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8671 INSN before which new insns are placed with will clobber the register
8672 we return. If a basic block consists only of setting the return value
8673 register to a pseudo and using that register, the return value is not
8674 live before or after this block, yet we we'll insert our insns right in
8678 get_free_reg (HARD_REG_SET regs_live)
8680 if (! TEST_HARD_REG_BIT (regs_live, 1))
8681 return gen_rtx_REG (Pmode, 1);
8683 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8684 there shouldn't be anything but a jump before the function end. */
8685 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8686 return gen_rtx_REG (Pmode, 7);
8689 /* This function will set the fpscr from memory.
8690 MODE is the mode we are setting it to. */
8692 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8694 enum attr_fp_mode fp_mode = mode;
8695 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8698 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8699 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8702 /* Is the given character a logical line separator for the assembler? */
8703 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8704 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8708 sh_insn_length_adjustment (rtx insn)
8710 /* Instructions with unfilled delay slots take up an extra two bytes for
8711 the nop in the delay slot. */
8712 if (((GET_CODE (insn) == INSN
8713 && GET_CODE (PATTERN (insn)) != USE
8714 && GET_CODE (PATTERN (insn)) != CLOBBER)
8715 || GET_CODE (insn) == CALL_INSN
8716 || (GET_CODE (insn) == JUMP_INSN
8717 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8718 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8719 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8720 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8723 /* SH2e has a bug that prevents the use of annulled branches, so if
8724 the delay slot is not filled, we'll have to put a NOP in it. */
8725 if (sh_cpu == CPU_SH2E
8726 && GET_CODE (insn) == JUMP_INSN
8727 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8728 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8729 && get_attr_type (insn) == TYPE_CBRANCH
8730 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8733 /* sh-dsp parallel processing insn take four bytes instead of two. */
8735 if (GET_CODE (insn) == INSN)
8738 rtx body = PATTERN (insn);
8741 int maybe_label = 1;
8743 if (GET_CODE (body) == ASM_INPUT)
8744 templ = XSTR (body, 0);
8745 else if (asm_noperands (body) >= 0)
8747 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8756 while (c == ' ' || c == '\t');
8757 /* all sh-dsp parallel-processing insns start with p.
8758 The only non-ppi sh insn starting with p is pref.
8759 The only ppi starting with pr is prnd. */
8760 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8762 /* The repeat pseudo-insn expands two three insns, a total of
8763 six bytes in size. */
8764 else if ((c == 'r' || c == 'R')
8765 && ! strncasecmp ("epeat", templ, 5))
8767 while (c && c != '\n'
8768 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8770 /* If this is a label, it is obviously not a ppi insn. */
8771 if (c == ':' && maybe_label)
8776 else if (c == '\'' || c == '"')
8781 maybe_label = c != ':';
8789 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8790 isn't protected by a PIC unspec. */
8792 nonpic_symbol_mentioned_p (rtx x)
8794 register const char *fmt;
8797 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8798 || GET_CODE (x) == PC)
8801 /* We don't want to look into the possible MEM location of a
8802 CONST_DOUBLE, since we're not going to use it, in general. */
8803 if (GET_CODE (x) == CONST_DOUBLE)
8806 if (GET_CODE (x) == UNSPEC
8807 && (XINT (x, 1) == UNSPEC_PIC
8808 || XINT (x, 1) == UNSPEC_GOT
8809 || XINT (x, 1) == UNSPEC_GOTOFF
8810 || XINT (x, 1) == UNSPEC_GOTPLT
8811 || XINT (x, 1) == UNSPEC_GOTTPOFF
8812 || XINT (x, 1) == UNSPEC_DTPOFF
8813 || XINT (x, 1) == UNSPEC_PLT
8814 || XINT (x, 1) == UNSPEC_SYMOFF
8815 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
8818 fmt = GET_RTX_FORMAT (GET_CODE (x));
8819 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8825 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8826 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8829 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8836 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8837 @GOTOFF in `reg'. */
8839 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8842 if (tls_symbolic_operand (orig, Pmode))
8845 if (GET_CODE (orig) == LABEL_REF
8846 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8849 reg = gen_reg_rtx (Pmode);
8851 emit_insn (gen_symGOTOFF2reg (reg, orig));
8854 else if (GET_CODE (orig) == SYMBOL_REF)
8857 reg = gen_reg_rtx (Pmode);
8859 emit_insn (gen_symGOT2reg (reg, orig));
8865 /* Mark the use of a constant in the literal table. If the constant
8866 has multiple labels, make it unique. */
8868 mark_constant_pool_use (rtx x)
8870 rtx insn, lab, pattern;
8875 switch (GET_CODE (x))
8885 /* Get the first label in the list of labels for the same constant
8886 and delete another labels in the list. */
8888 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8890 if (GET_CODE (insn) != CODE_LABEL
8891 || LABEL_REFS (insn) != NEXT_INSN (insn))
8896 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8897 INSN_DELETED_P (insn) = 1;
8899 /* Mark constants in a window. */
8900 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8902 if (GET_CODE (insn) != INSN)
8905 pattern = PATTERN (insn);
8906 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8909 switch (XINT (pattern, 1))
8911 case UNSPECV_CONST2:
8912 case UNSPECV_CONST4:
8913 case UNSPECV_CONST8:
8914 XVECEXP (pattern, 0, 1) = const1_rtx;
8916 case UNSPECV_WINDOW_END:
8917 if (XVECEXP (pattern, 0, 0) == x)
8920 case UNSPECV_CONST_END:
8930 /* Return true if it's possible to redirect BRANCH1 to the destination
8931 of an unconditional jump BRANCH2. We only want to do this if the
8932 resulting branch will have a short displacement. */
8934 sh_can_redirect_branch (rtx branch1, rtx branch2)
8936 if (flag_expensive_optimizations && simplejump_p (branch2))
8938 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8942 for (distance = 0, insn = NEXT_INSN (branch1);
8943 insn && distance < 256;
8944 insn = PREV_INSN (insn))
8949 distance += get_attr_length (insn);
8951 for (distance = 0, insn = NEXT_INSN (branch1);
8952 insn && distance < 256;
8953 insn = NEXT_INSN (insn))
8958 distance += get_attr_length (insn);
8964 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8966 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8967 unsigned int new_reg)
8969 /* Interrupt functions can only use registers that have already been
8970 saved by the prologue, even if they would normally be
8973 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
8979 /* Function to update the integer COST
8980 based on the relationship between INSN that is dependent on
8981 DEP_INSN through the dependence LINK. The default is to make no
8982 adjustment to COST. This can be used for example to specify to
8983 the scheduler that an output- or anti-dependence does not incur
8984 the same cost as a data-dependence. The return value should be
8985 the new value for COST. */
8987 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8993 /* On SHmedia, if the dependence is an anti-dependence or
8994 output-dependence, there is no cost. */
8995 if (REG_NOTE_KIND (link) != 0)
8997 /* However, dependencies between target register loads and
8998 uses of the register in a subsequent block that are separated
8999 by a conditional branch are not modelled - we have to do with
9000 the anti-dependency between the target register load and the
9001 conditional branch that ends the current block. */
9002 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9003 && GET_CODE (PATTERN (dep_insn)) == SET
9004 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9005 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9006 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9008 int orig_cost = cost;
9009 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9010 rtx target = ((! note
9011 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9012 ? insn : JUMP_LABEL (insn));
9013 /* On the likely path, the branch costs 1, on the unlikely path,
9017 target = next_active_insn (target);
9018 while (target && ! flow_dependent_p (target, dep_insn)
9020 /* If two branches are executed in immediate succession, with the
9021 first branch properly predicted, this causes a stall at the
9022 second branch, hence we won't need the target for the
9023 second branch for two cycles after the launch of the first
9025 if (cost > orig_cost - 2)
9026 cost = orig_cost - 2;
9032 else if (get_attr_is_mac_media (insn)
9033 && get_attr_is_mac_media (dep_insn))
9036 else if (! reload_completed
9037 && GET_CODE (PATTERN (insn)) == SET
9038 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9039 && GET_CODE (PATTERN (dep_insn)) == SET
9040 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9043 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9044 that is needed at the target. */
9045 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9046 && ! flow_dependent_p (insn, dep_insn))
9049 else if (REG_NOTE_KIND (link) == 0)
9051 enum attr_type type;
9054 if (recog_memoized (insn) < 0
9055 || recog_memoized (dep_insn) < 0)
9058 dep_set = single_set (dep_insn);
9060 /* The latency that we specify in the scheduling description refers
9061 to the actual output, not to an auto-increment register; for that,
9062 the latency is one. */
9063 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9065 rtx set = single_set (insn);
9068 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9069 && (!MEM_P (SET_DEST (set))
9070 || !reg_mentioned_p (SET_DEST (dep_set),
9071 XEXP (SET_DEST (set), 0))))
9074 /* The only input for a call that is timing-critical is the
9075 function's address. */
9076 if (GET_CODE (insn) == CALL_INSN)
9078 rtx call = PATTERN (insn);
9080 if (GET_CODE (call) == PARALLEL)
9081 call = XVECEXP (call, 0 ,0);
9082 if (GET_CODE (call) == SET)
9083 call = SET_SRC (call);
9084 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
9085 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9086 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9087 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9088 cost -= TARGET_SH4_300 ? 3 : 6;
9090 /* Likewise, the most timing critical input for an sfuncs call
9091 is the function address. However, sfuncs typically start
9092 using their arguments pretty quickly.
9093 Assume a four cycle delay for SH4 before they are needed.
9094 Cached ST40-300 calls are quicker, so assume only a one
9096 ??? Maybe we should encode the delays till input registers
9097 are needed by sfuncs into the sfunc call insn. */
9098 /* All sfunc calls are parallels with at least four components.
9099 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9100 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9101 && XVECLEN (PATTERN (insn), 0) >= 4
9102 && (reg = sfunc_uses_reg (insn)))
9104 if (! reg_set_p (reg, dep_insn))
9105 cost -= TARGET_SH4_300 ? 1 : 4;
9107 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9109 enum attr_type dep_type = get_attr_type (dep_insn);
9111 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9113 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9114 && (type = get_attr_type (insn)) != TYPE_CALL
9115 && type != TYPE_SFUNC)
9117 /* When the preceding instruction loads the shift amount of
9118 the following SHAD/SHLD, the latency of the load is increased
9120 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9121 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9122 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9123 XEXP (SET_SRC (single_set (insn)),
9126 /* When an LS group instruction with a latency of less than
9127 3 cycles is followed by a double-precision floating-point
9128 instruction, FIPR, or FTRV, the latency of the first
9129 instruction is increased to 3 cycles. */
9131 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9132 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9134 /* The lsw register of a double-precision computation is ready one
9136 else if (reload_completed
9137 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9138 && (use_pat = single_set (insn))
9139 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9143 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9144 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9147 else if (TARGET_SH4_300)
9149 /* Stores need their input register two cycles later. */
9150 if (dep_set && cost >= 1
9151 && ((type = get_attr_type (insn)) == TYPE_STORE
9152 || type == TYPE_PSTORE
9153 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9155 rtx set = single_set (insn);
9157 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9158 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9161 /* But don't reduce the cost below 1 if the address depends
9162 on a side effect of dep_insn. */
9164 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9170 /* An anti-dependence penalty of two applies if the first insn is a double
9171 precision fadd / fsub / fmul. */
9172 else if (!TARGET_SH4_300
9173 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9174 && recog_memoized (dep_insn) >= 0
9175 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9176 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9177 /* A lot of alleged anti-flow dependences are fake,
9178 so check this one is real. */
9179 && flow_dependent_p (dep_insn, insn))
9185 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9186 if DEP_INSN is anti-flow dependent on INSN. */
9188 flow_dependent_p (rtx insn, rtx dep_insn)
9190 rtx tmp = PATTERN (insn);
9192 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9193 return tmp == NULL_RTX;
9196 /* A helper function for flow_dependent_p called through note_stores. */
9198 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9200 rtx * pinsn = (rtx *) data;
9202 if (*pinsn && reg_referenced_p (x, *pinsn))
9206 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9207 'special function' patterns (type sfunc) that clobber pr, but that
9208 do not look like function calls to leaf_function_p. Hence we must
9209 do this extra check. */
9213 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9216 /* Return where to allocate pseudo for a given hard register initial
9219 sh_allocate_initial_value (rtx hard_reg)
9223 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9225 if (current_function_is_leaf
9226 && ! sh_pr_n_sets ()
9227 && ! (TARGET_SHCOMPACT
9228 && ((crtl->args.info.call_cookie
9229 & ~ CALL_COOKIE_RET_TRAMP (1))
9230 || crtl->saves_all_registers)))
9233 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9241 /* This function returns "2" to indicate dual issue for the SH4
9242 processor. To be used by the DFA pipeline description. */
9244 sh_issue_rate (void)
9246 if (TARGET_SUPERSCALAR)
9252 /* Functions for ready queue reordering for sched1. */
9254 /* Get weight for mode for a set x. */
9256 find_set_regmode_weight (rtx x, enum machine_mode mode)
9258 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9260 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9262 if (GET_CODE (SET_DEST (x)) == REG)
9264 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9274 /* Get regmode weight for insn. */
9276 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9278 short reg_weight = 0;
9281 /* Increment weight for each register born here. */
9283 reg_weight += find_set_regmode_weight (x, mode);
9284 if (GET_CODE (x) == PARALLEL)
9287 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9289 x = XVECEXP (PATTERN (insn), 0, j);
9290 reg_weight += find_set_regmode_weight (x, mode);
9293 /* Decrement weight for each register that dies here. */
9294 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9296 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9298 rtx note = XEXP (x, 0);
9299 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9306 /* Calculate regmode weights for all insns of a basic block. */
9308 find_regmode_weight (basic_block b, enum machine_mode mode)
9310 rtx insn, next_tail, head, tail;
9312 get_ebb_head_tail (b, b, &head, &tail);
9313 next_tail = NEXT_INSN (tail);
9315 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9317 /* Handle register life information. */
9322 INSN_REGMODE_WEIGHT (insn, mode) =
9323 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9324 else if (mode == SImode)
9325 INSN_REGMODE_WEIGHT (insn, mode) =
9326 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9330 /* Comparison function for ready queue sorting. */
9332 rank_for_reorder (const void *x, const void *y)
9334 rtx tmp = *(const rtx *) y;
9335 rtx tmp2 = *(const rtx *) x;
9337 /* The insn in a schedule group should be issued the first. */
9338 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9339 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9341 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9342 minimizes instruction movement, thus minimizing sched's effect on
9343 register pressure. */
9344 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9347 /* Resort the array A in which only element at index N may be out of order. */
9349 swap_reorder (rtx *a, int n)
9351 rtx insn = a[n - 1];
9354 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9362 #define SCHED_REORDER(READY, N_READY) \
9365 if ((N_READY) == 2) \
9366 swap_reorder (READY, N_READY); \
9367 else if ((N_READY) > 2) \
9368 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9372 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9375 ready_reorder (rtx *ready, int nready)
9377 SCHED_REORDER (ready, nready);
9380 /* Count life regions of r0 for a block. */
9382 find_r0_life_regions (basic_block b)
9391 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9404 r0_reg = gen_rtx_REG (SImode, R0_REG);
9409 if (find_regno_note (insn, REG_DEAD, R0_REG))
9415 && (pset = single_set (insn))
9416 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9417 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9425 insn = NEXT_INSN (insn);
9430 /* Calculate regmode weights for all insns of all basic block. */
9432 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9433 int verbose ATTRIBUTE_UNUSED,
9438 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9439 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9440 r0_life_regions = 0;
9442 FOR_EACH_BB_REVERSE (b)
9444 find_regmode_weight (b, SImode);
9445 find_regmode_weight (b, SFmode);
9446 if (!reload_completed)
9447 r0_life_regions += find_r0_life_regions (b);
9450 CURR_REGMODE_PRESSURE (SImode) = 0;
9451 CURR_REGMODE_PRESSURE (SFmode) = 0;
9457 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9458 int verbose ATTRIBUTE_UNUSED)
9460 if (regmode_weight[0])
9462 free (regmode_weight[0]);
9463 regmode_weight[0] = NULL;
9465 if (regmode_weight[1])
9467 free (regmode_weight[1]);
9468 regmode_weight[1] = NULL;
9472 /* The scalar modes supported differs from the default version in TImode
9473 for 32-bit SHMEDIA. */
9475 sh_scalar_mode_supported_p (enum machine_mode mode)
9477 if (TARGET_SHMEDIA32 && mode == TImode)
9480 return default_scalar_mode_supported_p (mode);
9483 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9484 keep count of register pressures on SImode and SFmode. */
9486 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9487 int sched_verbose ATTRIBUTE_UNUSED,
9491 if (GET_CODE (PATTERN (insn)) != USE
9492 && GET_CODE (PATTERN (insn)) != CLOBBER)
9493 cached_can_issue_more = can_issue_more - 1;
9495 cached_can_issue_more = can_issue_more;
9497 if (reload_completed)
9498 return cached_can_issue_more;
9500 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9501 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9503 return cached_can_issue_more;
9507 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9508 int verbose ATTRIBUTE_UNUSED,
9509 int veclen ATTRIBUTE_UNUSED)
9511 CURR_REGMODE_PRESSURE (SImode) = 0;
9512 CURR_REGMODE_PRESSURE (SFmode) = 0;
9515 /* Some magic numbers. */
9516 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9517 functions that already have high pressure on r0. */
9518 #define R0_MAX_LIFE_REGIONS 2
9519 /* Register Pressure thresholds for SImode and SFmode registers. */
9520 #define SIMODE_MAX_WEIGHT 5
9521 #define SFMODE_MAX_WEIGHT 10
9523 /* Return true if the pressure is high for MODE. */
9525 high_pressure (enum machine_mode mode)
9527 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9528 functions that already have high pressure on r0. */
9529 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9533 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9535 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9538 /* Reorder ready queue if register pressure is high. */
9540 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9541 int sched_verbose ATTRIBUTE_UNUSED,
9544 int clock_var ATTRIBUTE_UNUSED)
9546 if (reload_completed)
9547 return sh_issue_rate ();
9549 if (high_pressure (SFmode) || high_pressure (SImode))
9551 ready_reorder (ready, *n_readyp);
9554 return sh_issue_rate ();
9557 /* Skip cycles if the current register pressure is high. */
9559 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9560 int sched_verbose ATTRIBUTE_UNUSED,
9561 rtx *ready ATTRIBUTE_UNUSED,
9562 int *n_readyp ATTRIBUTE_UNUSED,
9563 int clock_var ATTRIBUTE_UNUSED)
9565 if (reload_completed)
9566 return cached_can_issue_more;
9568 if (high_pressure(SFmode) || high_pressure (SImode))
9571 return cached_can_issue_more;
9574 /* Skip cycles without sorting the ready queue. This will move insn from
9575 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9576 queue by sh_reorder. */
9578 /* Generally, skipping these many cycles are sufficient for all insns to move
9583 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9584 int sched_verbose ATTRIBUTE_UNUSED,
9585 rtx insn ATTRIBUTE_UNUSED,
9590 if (reload_completed)
9595 if ((clock_var - last_clock_var) < MAX_SKIPS)
9600 /* If this is the last cycle we are skipping, allow reordering of R. */
9601 if ((clock_var - last_clock_var) == MAX_SKIPS)
9613 /* SHmedia requires registers for branches, so we can't generate new
9614 branches past reload. */
9616 sh_cannot_modify_jumps_p (void)
9618 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9622 sh_target_reg_class (void)
9624 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9628 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9635 if (! shmedia_space_reserved_for_target_registers)
9637 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9639 if (calc_live_regs (&dummy) >= 6 * 8)
9645 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9647 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9651 On the SH1..SH4, the trampoline looks like
9652 2 0002 D202 mov.l l2,r2
9653 1 0000 D301 mov.l l1,r3
9656 5 0008 00000000 l1: .long area
9657 6 000c 00000000 l2: .long function
9659 SH5 (compact) uses r1 instead of r3 for the static chain. */
9662 /* Emit RTL insns to initialize the variable parts of a trampoline.
9663 FNADDR is an RTX for the address of the function's pure code.
9664 CXT is an RTX for the static chain value for the function. */
9667 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9669 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9671 if (TARGET_SHMEDIA64)
9676 rtx movi1 = GEN_INT (0xcc000010);
9677 rtx shori1 = GEN_INT (0xc8000010);
9680 /* The following trampoline works within a +- 128 KB range for cxt:
9681 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9682 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9683 gettr tr1,r1; blink tr0,r63 */
9684 /* Address rounding makes it hard to compute the exact bounds of the
9685 offset for this trampoline, but we have a rather generous offset
9686 range, so frame_offset should do fine as an upper bound. */
9687 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9689 /* ??? could optimize this trampoline initialization
9690 by writing DImode words with two insns each. */
9691 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9692 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9693 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9694 insn = gen_rtx_AND (DImode, insn, mask);
9695 /* Or in ptb/u .,tr1 pattern */
9696 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9697 insn = force_operand (insn, NULL_RTX);
9698 insn = gen_lowpart (SImode, insn);
9699 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9700 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9701 insn = gen_rtx_AND (DImode, insn, mask);
9702 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9703 insn = gen_lowpart (SImode, insn);
9704 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9705 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9706 insn = gen_rtx_AND (DImode, insn, mask);
9707 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9708 insn = gen_lowpart (SImode, insn);
9709 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9710 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9711 insn = gen_rtx_AND (DImode, insn, mask);
9712 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9713 insn = gen_lowpart (SImode, insn);
9714 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9715 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9716 insn = gen_rtx_AND (DImode, insn, mask);
9717 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9718 insn = gen_lowpart (SImode, insn);
9719 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9720 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9721 GEN_INT (0x6bf10600));
9722 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9723 GEN_INT (0x4415fc10));
9724 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9725 GEN_INT (0x4401fff0));
9726 emit_insn (gen_ic_invalidate_line (tramp));
9729 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9730 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9732 tramp_templ = gen_datalabel_ref (tramp_templ);
9734 src = gen_const_mem (BLKmode, tramp_templ);
9735 set_mem_align (dst, 256);
9736 set_mem_align (src, 64);
9737 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9739 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9740 emit_move_insn (adjust_address (tramp_mem, Pmode,
9741 fixed_len + GET_MODE_SIZE (Pmode)),
9743 emit_insn (gen_ic_invalidate_line (tramp));
9746 else if (TARGET_SHMEDIA)
9748 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9749 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9750 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9751 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9752 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9753 rotated 10 right, and higher 16 bit of every 32 selected. */
9755 = force_reg (V2HImode, (simplify_gen_subreg
9756 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9757 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9758 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9760 tramp = force_reg (Pmode, tramp);
9761 fnaddr = force_reg (SImode, fnaddr);
9762 cxt = force_reg (SImode, cxt);
9763 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9764 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9766 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9767 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9768 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9769 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9770 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9771 gen_rtx_SUBREG (V2HImode, cxt, 0),
9773 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9774 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9775 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9776 if (TARGET_LITTLE_ENDIAN)
9778 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9779 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9783 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9784 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9786 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9787 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9788 emit_insn (gen_ic_invalidate_line (tramp));
9791 else if (TARGET_SHCOMPACT)
9793 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9796 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9797 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9799 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9800 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9802 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9803 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9806 if (!TARGET_INLINE_IC_INVALIDATE
9807 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9808 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9810 0, VOIDmode, 1, tramp, SImode);
9812 emit_insn (gen_ic_invalidate_line (tramp));
9816 /* FIXME: This is overly conservative. A SHcompact function that
9817 receives arguments ``by reference'' will have them stored in its
9818 own stack frame, so it must not pass pointers or references to
9819 these arguments to other functions by means of sibling calls. */
9820 /* If PIC, we cannot make sibling calls to global functions
9821 because the PLT requires r12 to be live. */
9823 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9826 && (! TARGET_SHCOMPACT
9827 || crtl->args.info.stack_regs == 0)
9828 && ! sh_cfun_interrupt_handler_p ()
9830 || (decl && ! TREE_PUBLIC (decl))
9831 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9834 /* Machine specific built-in functions. */
9836 struct builtin_description
9838 const enum insn_code icode;
9839 const char *const name;
9843 /* describe number and signedness of arguments; arg[0] == result
9844 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9845 /* 9: 64-bit pointer, 10: 32-bit pointer */
9846 static const char signature_args[][4] =
9848 #define SH_BLTIN_V2SI2 0
9850 #define SH_BLTIN_V4HI2 1
9852 #define SH_BLTIN_V2SI3 2
9854 #define SH_BLTIN_V4HI3 3
9856 #define SH_BLTIN_V8QI3 4
9858 #define SH_BLTIN_MAC_HISI 5
9860 #define SH_BLTIN_SH_HI 6
9862 #define SH_BLTIN_SH_SI 7
9864 #define SH_BLTIN_V4HI2V2SI 8
9866 #define SH_BLTIN_V4HI2V8QI 9
9868 #define SH_BLTIN_SISF 10
9870 #define SH_BLTIN_LDUA_L 11
9872 #define SH_BLTIN_LDUA_Q 12
9874 #define SH_BLTIN_STUA_L 13
9876 #define SH_BLTIN_STUA_Q 14
9878 #define SH_BLTIN_LDUA_L64 15
9880 #define SH_BLTIN_LDUA_Q64 16
9882 #define SH_BLTIN_STUA_L64 17
9884 #define SH_BLTIN_STUA_Q64 18
9886 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9887 #define SH_BLTIN_2 19
9888 #define SH_BLTIN_SU 19
9890 #define SH_BLTIN_3 20
9891 #define SH_BLTIN_SUS 20
9893 #define SH_BLTIN_PSSV 21
9895 #define SH_BLTIN_XXUU 22
9896 #define SH_BLTIN_UUUU 22
9898 #define SH_BLTIN_PV 23
9901 /* mcmv: operands considered unsigned. */
9902 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9903 /* mperm: control value considered unsigned int. */
9904 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9905 /* mshards_q: returns signed short. */
9906 /* nsb: takes long long arg, returns unsigned char. */
9907 static const struct builtin_description bdesc[] =
9909 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9910 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9911 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9912 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9913 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9914 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9915 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9916 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9917 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9918 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9919 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9920 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9921 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9922 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9923 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9924 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9925 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9926 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9927 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9928 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9929 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9930 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9931 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9932 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9933 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9934 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9935 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9936 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9937 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9938 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9939 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9940 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9941 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9942 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9943 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9944 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9945 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9946 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9947 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9948 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9949 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9950 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9951 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9952 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9953 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9954 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9955 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9956 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9957 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9958 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9959 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9960 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9961 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9962 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9963 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9964 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9965 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9966 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9967 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9968 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9969 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9970 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9971 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9972 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9973 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9974 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9975 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9976 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9977 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9978 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9979 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9980 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9981 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9982 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9983 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9984 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9985 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9986 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9987 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9988 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9989 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9990 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9991 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9992 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9996 sh_media_init_builtins (void)
9998 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9999 const struct builtin_description *d;
10001 memset (shared, 0, sizeof shared);
10002 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10004 tree type, arg_type = 0;
10005 int signature = d->signature;
10008 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10009 type = shared[signature];
10012 int has_result = signature_args[signature][0] != 0;
10014 if ((signature_args[signature][1] & 8)
10015 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10016 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10018 if (! TARGET_FPU_ANY
10019 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10021 type = void_list_node;
10024 int arg = signature_args[signature][i];
10025 int opno = i - 1 + has_result;
10028 arg_type = ptr_type_node;
10030 arg_type = (*lang_hooks.types.type_for_mode)
10031 (insn_data[d->icode].operand[opno].mode,
10036 arg_type = void_type_node;
10039 type = tree_cons (NULL_TREE, arg_type, type);
10041 type = build_function_type (arg_type, type);
10042 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10043 shared[signature] = type;
10045 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10050 /* Implements target hook vector_mode_supported_p. */
10052 sh_vector_mode_supported_p (enum machine_mode mode)
10055 && ((mode == V2SFmode)
10056 || (mode == V4SFmode)
10057 || (mode == V16SFmode)))
10060 else if (TARGET_SHMEDIA
10061 && ((mode == V8QImode)
10062 || (mode == V2HImode)
10063 || (mode == V4HImode)
10064 || (mode == V2SImode)))
10070 /* Implements target hook dwarf_calling_convention. Return an enum
10071 of dwarf_calling_convention. */
10073 sh_dwarf_calling_convention (const_tree func)
10075 if (sh_attr_renesas_p (func))
10076 return DW_CC_GNU_renesas_sh;
10078 return DW_CC_normal;
10082 sh_init_builtins (void)
10084 if (TARGET_SHMEDIA)
10085 sh_media_init_builtins ();
10088 /* Expand an expression EXP that calls a built-in function,
10089 with result going to TARGET if that's convenient
10090 (and in mode MODE if that's convenient).
10091 SUBTARGET may be used as the target for computing one of EXP's operands.
10092 IGNORE is nonzero if the value is to be ignored. */
10095 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10096 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10098 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10099 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10100 const struct builtin_description *d = &bdesc[fcode];
10101 enum insn_code icode = d->icode;
10102 int signature = d->signature;
10103 enum machine_mode tmode = VOIDmode;
10108 if (signature_args[signature][0])
10113 tmode = insn_data[icode].operand[0].mode;
10115 || GET_MODE (target) != tmode
10116 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10117 target = gen_reg_rtx (tmode);
10118 op[nop++] = target;
10123 for (i = 1; i <= 3; i++, nop++)
10126 enum machine_mode opmode, argmode;
10129 if (! signature_args[signature][i])
10131 arg = CALL_EXPR_ARG (exp, i - 1);
10132 if (arg == error_mark_node)
10134 if (signature_args[signature][i] & 8)
10137 optype = ptr_type_node;
10141 opmode = insn_data[icode].operand[nop].mode;
10142 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10144 argmode = TYPE_MODE (TREE_TYPE (arg));
10145 if (argmode != opmode)
10146 arg = build1 (NOP_EXPR, optype, arg);
10147 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
10148 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10149 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10155 pat = (*insn_data[d->icode].genfun) (op[0]);
10158 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10161 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10164 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10167 gcc_unreachable ();
10176 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10178 rtx sel0 = const0_rtx;
10179 rtx sel1 = const1_rtx;
10180 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10181 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10183 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10184 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10188 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10190 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10192 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10193 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10196 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10197 We can allow any mode in any general register. The special registers
10198 only allow SImode. Don't allow any mode in the PR.
10200 We cannot hold DCmode values in the XD registers because alter_reg
10201 handles subregs of them incorrectly. We could work around this by
10202 spacing the XD registers like the DR registers, but this would require
10203 additional memory in every compilation to hold larger register vectors.
10204 We could hold SFmode / SCmode values in XD registers, but that
10205 would require a tertiary reload when reloading from / to memory,
10206 and a secondary reload to reload from / to general regs; that
10207 seems to be a loosing proposition.
10209 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10210 it won't be ferried through GP registers first. */
10213 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10215 if (SPECIAL_REGISTER_P (regno))
10216 return mode == SImode;
10218 if (regno == FPUL_REG)
10219 return (mode == SImode || mode == SFmode);
10221 if (FP_REGISTER_P (regno) && mode == SFmode)
10224 if (mode == V2SFmode)
10226 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10227 || GENERAL_REGISTER_P (regno)))
10233 if (mode == V4SFmode)
10235 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10236 || GENERAL_REGISTER_P (regno))
10242 if (mode == V16SFmode)
10244 if (TARGET_SHMEDIA)
10246 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
10252 return regno == FIRST_XD_REG;
10255 if (FP_REGISTER_P (regno))
10259 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
10260 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10263 && (mode == DFmode || mode == DImode
10264 || mode == V2SFmode || mode == TImode)))
10265 && ((regno - FIRST_FP_REG) & 1) == 0)
10266 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
10267 && ((regno - FIRST_FP_REG) & 3) == 0))
10273 if (XD_REGISTER_P (regno))
10274 return mode == DFmode;
10276 if (TARGET_REGISTER_P (regno))
10277 return (mode == DImode || mode == SImode || mode == PDImode);
10279 if (regno == PR_REG)
10280 return mode == SImode;
10282 if (regno == FPSCR_REG)
10283 return mode == PSImode;
10285 /* FIXME. This works around PR target/37633 for -O0. */
10286 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
10288 unsigned int n = GET_MODE_SIZE (mode) / 8;
10290 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
10291 && regno <= FIRST_GENERAL_REG + 14)
10298 /* Return the class of registers for which a mode change from FROM to TO
10301 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10302 enum reg_class rclass)
10304 /* We want to enable the use of SUBREGs as a means to
10305 VEC_SELECT a single element of a vector. */
10306 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10307 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
10309 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10311 if (TARGET_LITTLE_ENDIAN)
10313 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10314 return reg_classes_intersect_p (DF_REGS, rclass);
10318 if (GET_MODE_SIZE (from) < 8)
10319 return reg_classes_intersect_p (DF_HI_REGS, rclass);
10326 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10327 that label is used. */
10330 sh_mark_label (rtx address, int nuses)
10332 if (GOTOFF_P (address))
10334 /* Extract the label or symbol. */
10335 address = XEXP (address, 0);
10336 if (GET_CODE (address) == PLUS)
10337 address = XEXP (address, 0);
10338 address = XVECEXP (address, 0, 0);
10340 if (GET_CODE (address) == LABEL_REF
10341 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10342 LABEL_NUSES (XEXP (address, 0)) += nuses;
10345 /* Compute extra cost of moving data between one register class
10348 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10349 uses this information. Hence, the general register <-> floating point
10350 register information here is not used for SFmode. */
10353 sh_register_move_cost (enum machine_mode mode,
10354 enum reg_class srcclass, enum reg_class dstclass)
10356 if (dstclass == T_REGS || dstclass == PR_REGS)
10359 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10362 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10363 && REGCLASS_HAS_FP_REG (srcclass)
10364 && REGCLASS_HAS_FP_REG (dstclass))
10367 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10368 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10370 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10371 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10374 if ((REGCLASS_HAS_FP_REG (dstclass)
10375 && REGCLASS_HAS_GENERAL_REG (srcclass))
10376 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10377 && REGCLASS_HAS_FP_REG (srcclass)))
10378 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10379 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10381 if ((dstclass == FPUL_REGS
10382 && REGCLASS_HAS_GENERAL_REG (srcclass))
10383 || (srcclass == FPUL_REGS
10384 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10387 if ((dstclass == FPUL_REGS
10388 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10389 || (srcclass == FPUL_REGS
10390 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10393 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10394 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10397 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10399 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10401 if (sh_gettrcost >= 0)
10402 return sh_gettrcost;
10403 else if (!TARGET_PT_FIXED)
10407 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10408 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10413 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10414 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10415 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10417 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10420 static rtx emit_load_ptr (rtx, rtx);
10423 emit_load_ptr (rtx reg, rtx addr)
10425 rtx mem = gen_const_mem (ptr_mode, addr);
10427 if (Pmode != ptr_mode)
10428 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10429 return emit_move_insn (reg, mem);
10433 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10434 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10437 CUMULATIVE_ARGS cum;
10438 int structure_value_byref = 0;
10439 rtx this_rtx, this_value, sibcall, insns, funexp;
10440 tree funtype = TREE_TYPE (function);
10441 int simple_add = CONST_OK_FOR_ADD (delta);
10443 rtx scratch0, scratch1, scratch2;
10446 reload_completed = 1;
10447 epilogue_completed = 1;
10448 current_function_uses_only_leaf_regs = 1;
10450 emit_note (NOTE_INSN_PROLOGUE_END);
10452 /* Find the "this" pointer. We have such a wide range of ABIs for the
10453 SH that it's best to do this completely machine independently.
10454 "this" is passed as first argument, unless a structure return pointer
10455 comes first, in which case "this" comes second. */
10456 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10457 #ifndef PCC_STATIC_STRUCT_RETURN
10458 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10459 structure_value_byref = 1;
10460 #endif /* not PCC_STATIC_STRUCT_RETURN */
10461 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10463 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10465 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10467 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10469 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10470 static chain pointer (even if you can't have nested virtual functions
10471 right now, someone might implement them sometime), and the rest of the
10472 registers are used for argument passing, are callee-saved, or reserved. */
10473 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10474 -ffixed-reg has been used. */
10475 if (! call_used_regs[0] || fixed_regs[0])
10476 error ("r0 needs to be available as a call-clobbered register");
10477 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10480 if (call_used_regs[1] && ! fixed_regs[1])
10481 scratch1 = gen_rtx_REG (ptr_mode, 1);
10482 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10483 pointing where to return struct values. */
10484 if (call_used_regs[3] && ! fixed_regs[3])
10485 scratch2 = gen_rtx_REG (Pmode, 3);
10487 else if (TARGET_SHMEDIA)
10489 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10490 if (i != REGNO (scratch0) &&
10491 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10493 scratch1 = gen_rtx_REG (ptr_mode, i);
10496 if (scratch1 == scratch0)
10497 error ("Need a second call-clobbered general purpose register");
10498 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10499 if (call_used_regs[i] && ! fixed_regs[i])
10501 scratch2 = gen_rtx_REG (Pmode, i);
10504 if (scratch2 == scratch0)
10505 error ("Need a call-clobbered target register");
10508 this_value = plus_constant (this_rtx, delta);
10510 && (simple_add || scratch0 != scratch1)
10511 && strict_memory_address_p (ptr_mode, this_value))
10513 emit_load_ptr (scratch0, this_value);
10518 ; /* Do nothing. */
10519 else if (simple_add)
10520 emit_move_insn (this_rtx, this_value);
10523 emit_move_insn (scratch1, GEN_INT (delta));
10524 emit_insn (gen_add2_insn (this_rtx, scratch1));
10532 emit_load_ptr (scratch0, this_rtx);
10534 offset_addr = plus_constant (scratch0, vcall_offset);
10535 if (strict_memory_address_p (ptr_mode, offset_addr))
10536 ; /* Do nothing. */
10537 else if (! TARGET_SH5 && scratch0 != scratch1)
10539 /* scratch0 != scratch1, and we have indexed loads. Get better
10540 schedule by loading the offset into r1 and using an indexed
10541 load - then the load of r1 can issue before the load from
10542 (this_rtx + delta) finishes. */
10543 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10544 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10546 else if (CONST_OK_FOR_ADD (vcall_offset))
10548 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10549 offset_addr = scratch0;
10551 else if (scratch0 != scratch1)
10553 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10554 emit_insn (gen_add2_insn (scratch0, scratch1));
10555 offset_addr = scratch0;
10558 gcc_unreachable (); /* FIXME */
10559 emit_load_ptr (scratch0, offset_addr);
10561 if (Pmode != ptr_mode)
10562 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10563 emit_insn (gen_add2_insn (this_rtx, scratch0));
10566 /* Generate a tail call to the target function. */
10567 if (! TREE_USED (function))
10569 assemble_external (function);
10570 TREE_USED (function) = 1;
10572 funexp = XEXP (DECL_RTL (function), 0);
10573 /* If the function is overridden, so is the thunk, hence we don't
10574 need GOT addressing even if this is a public symbol. */
10576 if (TARGET_SH1 && ! flag_weak)
10577 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10580 if (TARGET_SH2 && flag_pic)
10582 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10583 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10587 if (TARGET_SHMEDIA && flag_pic)
10589 funexp = gen_sym2PIC (funexp);
10590 PUT_MODE (funexp, Pmode);
10592 emit_move_insn (scratch2, funexp);
10593 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10594 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10596 sibcall = emit_call_insn (sibcall);
10597 SIBLING_CALL_P (sibcall) = 1;
10598 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10601 /* Run just enough of rest_of_compilation to do scheduling and get
10602 the insns emitted. Note that use_thunk calls
10603 assemble_start_function and assemble_end_function. */
10605 insn_locators_alloc ();
10606 insns = get_insns ();
10611 /* Initialize the bitmap obstacks. */
10612 bitmap_obstack_initialize (NULL);
10613 bitmap_obstack_initialize (®_obstack);
10616 rtl_register_cfg_hooks ();
10617 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10618 init_rtl_bb_info (EXIT_BLOCK_PTR);
10619 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10620 EXIT_BLOCK_PTR->flags |= BB_RTL;
10621 find_basic_blocks (insns);
10623 if (flag_schedule_insns_after_reload)
10625 life_analysis (PROP_FINAL);
10627 split_all_insns (1);
10631 /* We must split jmp insn in PIC case. */
10633 split_all_insns_noflow ();
10640 split_all_insns_noflow ();
10646 if (optimize > 0 && flag_delayed_branch)
10647 dbr_schedule (insns);
10649 shorten_branches (insns);
10650 final_start_function (insns, file, 1);
10651 final (insns, file, 1);
10652 final_end_function ();
10653 free_after_compilation (cfun);
10655 reload_completed = 0;
10656 epilogue_completed = 0;
10660 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10664 /* If this is not an ordinary function, the name usually comes from a
10665 string literal or an sprintf buffer. Make sure we use the same
10666 string consistently, so that cse will be able to unify address loads. */
10667 if (kind != FUNCTION_ORDINARY)
10668 name = IDENTIFIER_POINTER (get_identifier (name));
10669 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10670 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10674 case FUNCTION_ORDINARY:
10678 rtx reg = target ? target : gen_reg_rtx (Pmode);
10680 emit_insn (gen_symGOT2reg (reg, sym));
10686 /* ??? To allow cse to work, we use GOTOFF relocations.
10687 we could add combiner patterns to transform this into
10688 straight pc-relative calls with sym2PIC / bsrf when
10689 label load and function call are still 1:1 and in the
10690 same basic block during combine. */
10691 rtx reg = target ? target : gen_reg_rtx (Pmode);
10693 emit_insn (gen_symGOTOFF2reg (reg, sym));
10698 if (target && sym != target)
10700 emit_move_insn (target, sym);
10706 /* Find the number of a general purpose register in S. */
10708 scavenge_reg (HARD_REG_SET *s)
10711 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10712 if (TEST_HARD_REG_BIT (*s, r))
10718 sh_get_pr_initial_val (void)
10722 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10723 PR register on SHcompact, because it might be clobbered by the prologue.
10724 We check first if that is known to be the case. */
10725 if (TARGET_SHCOMPACT
10726 && ((crtl->args.info.call_cookie
10727 & ~ CALL_COOKIE_RET_TRAMP (1))
10728 || crtl->saves_all_registers))
10729 return gen_frame_mem (SImode, return_address_pointer_rtx);
10731 /* If we haven't finished rtl generation, there might be a nonlocal label
10732 that we haven't seen yet.
10733 ??? get_hard_reg_initial_val fails if it is called after register
10734 allocation has started, unless it has been called before for the
10735 same register. And even then, we end in trouble if we didn't use
10736 the register in the same basic block before. So call
10737 get_hard_reg_initial_val now and wrap it in an unspec if we might
10738 need to replace it. */
10739 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10740 combine can put the pseudo returned by get_hard_reg_initial_val into
10741 instructions that need a general purpose registers, which will fail to
10742 be recognized when the pseudo becomes allocated to PR. */
10744 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10746 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10751 sh_expand_t_scc (enum rtx_code code, rtx target)
10753 rtx result = target;
10756 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10757 || GET_CODE (sh_compare_op1) != CONST_INT)
10759 if (GET_CODE (result) != REG)
10760 result = gen_reg_rtx (SImode);
10761 val = INTVAL (sh_compare_op1);
10762 if ((code == EQ && val == 1) || (code == NE && val == 0))
10763 emit_insn (gen_movt (result));
10764 else if (TARGET_SH2A && ((code == EQ && val == 0)
10765 || (code == NE && val == 1)))
10766 emit_insn (gen_movrt (result));
10767 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10769 emit_clobber (result);
10770 emit_insn (gen_subc (result, result, result));
10771 emit_insn (gen_addsi3 (result, result, const1_rtx));
10773 else if (code == EQ || code == NE)
10774 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10777 if (result != target)
10778 emit_move_insn (target, result);
10782 /* INSN is an sfunc; return the rtx that describes the address used. */
10784 extract_sfunc_addr (rtx insn)
10786 rtx pattern, part = NULL_RTX;
10789 pattern = PATTERN (insn);
10790 len = XVECLEN (pattern, 0);
10791 for (i = 0; i < len; i++)
10793 part = XVECEXP (pattern, 0, i);
10794 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10795 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10796 return XEXP (part, 0);
10798 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10799 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10802 /* Verify that the register in use_sfunc_addr still agrees with the address
10803 used in the sfunc. This prevents fill_slots_from_thread from changing
10805 INSN is the use_sfunc_addr instruction, and REG is the register it
10808 check_use_sfunc_addr (rtx insn, rtx reg)
10810 /* Search for the sfunc. It should really come right after INSN. */
10811 while ((insn = NEXT_INSN (insn)))
10813 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10815 if (! INSN_P (insn))
10818 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10819 insn = XVECEXP (PATTERN (insn), 0, 0);
10820 if (GET_CODE (PATTERN (insn)) != PARALLEL
10821 || get_attr_type (insn) != TYPE_SFUNC)
10823 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10825 gcc_unreachable ();
10828 /* This function returns a constant rtx that represents pi / 2**15 in
10829 SFmode. it's used to scale SFmode angles, in radians, to a
10830 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10831 maps to 0x10000). */
10833 static GTY(()) rtx sh_fsca_sf2int_rtx;
10836 sh_fsca_sf2int (void)
10838 if (! sh_fsca_sf2int_rtx)
10840 REAL_VALUE_TYPE rv;
10842 real_from_string (&rv, "10430.378350470453");
10843 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10846 return sh_fsca_sf2int_rtx;
10849 /* This function returns a constant rtx that represents pi / 2**15 in
10850 DFmode. it's used to scale DFmode angles, in radians, to a
10851 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10852 maps to 0x10000). */
10854 static GTY(()) rtx sh_fsca_df2int_rtx;
10857 sh_fsca_df2int (void)
10859 if (! sh_fsca_df2int_rtx)
10861 REAL_VALUE_TYPE rv;
10863 real_from_string (&rv, "10430.378350470453");
10864 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10867 return sh_fsca_df2int_rtx;
10870 /* This function returns a constant rtx that represents 2**15 / pi in
10871 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10872 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10875 static GTY(()) rtx sh_fsca_int2sf_rtx;
10878 sh_fsca_int2sf (void)
10880 if (! sh_fsca_int2sf_rtx)
10882 REAL_VALUE_TYPE rv;
10884 real_from_string (&rv, "9.587379924285257e-5");
10885 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10888 return sh_fsca_int2sf_rtx;
10891 /* Initialize the CUMULATIVE_ARGS structure. */
10894 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10896 rtx libname ATTRIBUTE_UNUSED,
10898 signed int n_named_args,
10899 enum machine_mode mode)
10901 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10902 pcum->free_single_fp_reg = 0;
10903 pcum->stack_regs = 0;
10904 pcum->byref_regs = 0;
10906 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10908 /* XXX - Should we check TARGET_HITACHI here ??? */
10909 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10913 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10914 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10915 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10916 pcum->arg_count [(int) SH_ARG_INT]
10917 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10920 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10921 && pcum->arg_count [(int) SH_ARG_INT] == 0
10922 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10923 ? int_size_in_bytes (TREE_TYPE (fntype))
10924 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10925 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10926 == FIRST_RET_REG));
10930 pcum->arg_count [(int) SH_ARG_INT] = 0;
10931 pcum->prototype_p = FALSE;
10932 if (mode != VOIDmode)
10934 pcum->call_cookie =
10935 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10936 && GET_MODE_SIZE (mode) > 4
10937 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10939 /* If the default ABI is the Renesas ABI then all library
10940 calls must assume that the library will be using the
10941 Renesas ABI. So if the function would return its result
10942 in memory then we must force the address of this memory
10943 block onto the stack. Ideally we would like to call
10944 targetm.calls.return_in_memory() here but we do not have
10945 the TYPE or the FNDECL available so we synthesize the
10946 contents of that function as best we can. */
10948 (TARGET_DEFAULT & MASK_HITACHI)
10949 && (mode == BLKmode
10950 || (GET_MODE_SIZE (mode) > 4
10951 && !(mode == DFmode
10952 && TARGET_FPU_DOUBLE)));
10956 pcum->call_cookie = 0;
10957 pcum->force_mem = FALSE;
10962 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10963 not enter into CONST_DOUBLE for the replace.
10965 Note that copying is not done so X must not be shared unless all copies
10966 are to be modified.
10968 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10969 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10970 replacements[n*2+1] - and that we take mode changes into account.
10972 If a replacement is ambiguous, return NULL_RTX.
10974 If MODIFY is zero, don't modify any rtl in place,
10975 just return zero or nonzero for failure / success. */
10978 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10983 /* The following prevents loops occurrence when we change MEM in
10984 CONST_DOUBLE onto the same CONST_DOUBLE. */
10985 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10988 for (i = n_replacements - 1; i >= 0 ; i--)
10989 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10990 return replacements[i*2+1];
10992 /* Allow this function to make replacements in EXPR_LISTs. */
10996 if (GET_CODE (x) == SUBREG)
10998 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10999 n_replacements, modify);
11001 if (GET_CODE (new_rtx) == CONST_INT)
11003 x = simplify_subreg (GET_MODE (x), new_rtx,
11004 GET_MODE (SUBREG_REG (x)),
11010 SUBREG_REG (x) = new_rtx;
11014 else if (GET_CODE (x) == REG)
11016 unsigned regno = REGNO (x);
11017 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11018 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11019 rtx result = NULL_RTX;
11021 for (i = n_replacements - 1; i >= 0; i--)
11023 rtx from = replacements[i*2];
11024 rtx to = replacements[i*2+1];
11025 unsigned from_regno, from_nregs, to_regno, new_regno;
11027 if (GET_CODE (from) != REG)
11029 from_regno = REGNO (from);
11030 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11031 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11032 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11034 if (regno < from_regno
11035 || regno + nregs > from_regno + nregs
11036 || GET_CODE (to) != REG
11039 to_regno = REGNO (to);
11040 if (to_regno < FIRST_PSEUDO_REGISTER)
11042 new_regno = regno + to_regno - from_regno;
11043 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11046 result = gen_rtx_REG (GET_MODE (x), new_regno);
11048 else if (GET_MODE (x) <= GET_MODE (to))
11049 result = gen_lowpart_common (GET_MODE (x), to);
11051 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11054 return result ? result : x;
11056 else if (GET_CODE (x) == ZERO_EXTEND)
11058 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11059 n_replacements, modify);
11061 if (GET_CODE (new_rtx) == CONST_INT)
11063 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11064 new_rtx, GET_MODE (XEXP (x, 0)));
11069 XEXP (x, 0) = new_rtx;
11074 fmt = GET_RTX_FORMAT (GET_CODE (x));
11075 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11081 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11082 n_replacements, modify);
11086 XEXP (x, i) = new_rtx;
11088 else if (fmt[i] == 'E')
11089 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11091 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11092 n_replacements, modify);
11096 XVECEXP (x, i, j) = new_rtx;
11104 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11106 enum rtx_code code = TRUNCATE;
11108 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11110 rtx inner = XEXP (x, 0);
11111 enum machine_mode inner_mode = GET_MODE (inner);
11113 if (inner_mode == mode)
11115 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11117 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11118 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11120 code = GET_CODE (x);
11124 return gen_rtx_fmt_e (code, mode, x);
11127 /* called via for_each_rtx after reload, to clean up truncates of
11128 registers that span multiple actual hard registers. */
11130 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11134 if (GET_CODE (x) != TRUNCATE)
11137 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11139 enum machine_mode reg_mode = GET_MODE (reg);
11140 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11141 subreg_lowpart_offset (DImode, reg_mode));
11142 *(int*) n_changes += 1;
11148 /* Load and store depend on the highpart of the address. However,
11149 set_attr_alternative does not give well-defined results before reload,
11150 so we must look at the rtl ourselves to see if any of the feeding
11151 registers is used in a memref. */
11153 /* Called by sh_contains_memref_p via for_each_rtx. */
11155 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11157 return (GET_CODE (*loc) == MEM);
11160 /* Return nonzero iff INSN contains a MEM. */
11162 sh_contains_memref_p (rtx insn)
11164 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11167 /* Return nonzero iff INSN loads a banked register. */
11169 sh_loads_bankedreg_p (rtx insn)
11171 if (GET_CODE (PATTERN (insn)) == SET)
11173 rtx op = SET_DEST (PATTERN(insn));
11174 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11181 /* FNADDR is the MEM expression from a call expander. Return an address
11182 to use in an SHmedia insn pattern. */
11184 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11188 fnaddr = XEXP (fnaddr, 0);
11189 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11190 if (flag_pic && is_sym)
11192 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11194 rtx reg = gen_reg_rtx (Pmode);
11196 /* We must not use GOTPLT for sibcalls, because PIC_REG
11197 must be restored before the PLT code gets to run. */
11199 emit_insn (gen_symGOT2reg (reg, fnaddr));
11201 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11206 fnaddr = gen_sym2PIC (fnaddr);
11207 PUT_MODE (fnaddr, Pmode);
11210 /* If ptabs might trap, make this visible to the rest of the compiler.
11211 We generally assume that symbols pertain to valid locations, but
11212 it is possible to generate invalid symbols with asm or linker tricks.
11213 In a list of functions where each returns its successor, an invalid
11214 symbol might denote an empty list. */
11215 if (!TARGET_PT_FIXED
11216 && (!is_sym || TARGET_INVALID_SYMBOLS)
11217 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11219 rtx tr = gen_reg_rtx (PDImode);
11221 emit_insn (gen_ptabs (tr, fnaddr));
11224 else if (! target_reg_operand (fnaddr, Pmode))
11225 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11230 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11231 enum machine_mode mode, secondary_reload_info *sri)
11235 if (REGCLASS_HAS_FP_REG (rclass)
11236 && ! TARGET_SHMEDIA
11237 && immediate_operand ((x), mode)
11238 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11239 && mode == SFmode && fldi_ok ()))
11243 sri->icode = CODE_FOR_reload_insf__frn;
11246 sri->icode = CODE_FOR_reload_indf__frn;
11249 /* ??? If we knew that we are in the appropriate mode -
11250 single precision - we could use a reload pattern directly. */
11255 if (rclass == FPUL_REGS
11256 && ((GET_CODE (x) == REG
11257 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11258 || REGNO (x) == T_REG))
11259 || GET_CODE (x) == PLUS))
11260 return GENERAL_REGS;
11261 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11263 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11264 return GENERAL_REGS;
11265 else if (mode == SFmode)
11267 sri->icode = CODE_FOR_reload_insi__i_fpul;
11270 if (rclass == FPSCR_REGS
11271 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11272 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11273 return GENERAL_REGS;
11274 if (REGCLASS_HAS_FP_REG (rclass)
11276 && immediate_operand (x, mode)
11277 && x != CONST0_RTX (GET_MODE (x))
11278 && GET_MODE (x) != V4SFmode)
11279 return GENERAL_REGS;
11280 if ((mode == QImode || mode == HImode)
11281 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11283 sri->icode = ((mode == QImode)
11284 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11287 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
11288 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
11289 return TARGET_REGS;
11290 } /* end of input-only processing. */
11292 if (((REGCLASS_HAS_FP_REG (rclass)
11293 && (GET_CODE (x) == REG
11294 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11295 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11296 && TARGET_FMOVD))))
11297 || (REGCLASS_HAS_GENERAL_REG (rclass)
11298 && GET_CODE (x) == REG
11299 && FP_REGISTER_P (REGNO (x))))
11300 && ! TARGET_SHMEDIA
11301 && (mode == SFmode || mode == SImode))
11303 if ((rclass == FPUL_REGS
11304 || (REGCLASS_HAS_FP_REG (rclass)
11305 && ! TARGET_SHMEDIA && mode == SImode))
11306 && (GET_CODE (x) == MEM
11307 || (GET_CODE (x) == REG
11308 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11309 || REGNO (x) == T_REG
11310 || system_reg_operand (x, VOIDmode)))))
11312 if (rclass == FPUL_REGS)
11313 return GENERAL_REGS;
11316 if ((rclass == TARGET_REGS
11317 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
11318 && !satisfies_constraint_Csy (x)
11319 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11320 return GENERAL_REGS;
11321 if ((rclass == MAC_REGS || rclass == PR_REGS)
11322 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11323 && rclass != REGNO_REG_CLASS (REGNO (x)))
11324 return GENERAL_REGS;
11325 if (rclass != GENERAL_REGS && GET_CODE (x) == REG
11326 && TARGET_REGISTER_P (REGNO (x)))
11327 return GENERAL_REGS;
11331 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;