1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
41 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Saved operands from the last compare to use when we generate an scc
115 /* Provides the class number of the smallest class containing
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static bool sh_handle_option (size_t, const char *, int);
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void force_into (rtx, rtx);
176 static void print_slot (rtx);
177 static rtx add_constant (rtx, enum machine_mode, rtx);
178 static void dump_table (rtx, rtx);
179 static int hi_const (rtx);
180 static int broken_move (rtx);
181 static int mova_p (rtx);
182 static rtx find_barrier (int, rtx, rtx);
183 static int noncall_uses_reg (rtx, rtx, rtx *);
184 static rtx gen_block_redirect (rtx, int, int);
185 static void sh_reorg (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
187 static rtx frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET *, int);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static rtx mark_constant_pool_use (rtx);
194 const struct attribute_spec sh_attribute_table[];
195 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static int find_r0_life_regions (basic_block);
213 static void sh_md_init_global (FILE *, int, int);
214 static void sh_md_finish_global (FILE *, int);
215 static int rank_for_reorder (const void *, const void *);
216 static void swap_reorder (rtx *, int);
217 static void ready_reorder (rtx *, int);
218 static short high_pressure (enum machine_mode);
219 static int sh_reorder (FILE *, int, rtx *, int *, int);
220 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
221 static void sh_md_init (FILE *, int, int);
222 static int sh_variable_issue (FILE *, int, rtx, int);
224 static bool sh_function_ok_for_sibcall (tree, tree);
226 static bool sh_cannot_modify_jumps_p (void);
227 static int sh_target_reg_class (void);
228 static bool sh_optimize_target_register_callee_saved (bool);
229 static bool sh_ms_bitfield_layout_p (const_tree);
231 static void sh_init_builtins (void);
232 static void sh_media_init_builtins (void);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *);
245 static int sh_address_cost (rtx);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
249 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
250 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
251 static int scavenge_reg (HARD_REG_SET *s);
252 struct save_schedule_s;
253 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
254 struct save_schedule_s *, int);
256 static rtx sh_struct_value_rtx (tree, int);
257 static bool sh_return_in_memory (const_tree, const_tree);
258 static rtx sh_builtin_saveregs (void);
259 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
260 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
261 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
262 static tree sh_build_builtin_va_list (void);
263 static void sh_va_start (tree, rtx);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
281 /* The next two are used for debug info when compiling with -gdwarf. */
282 #undef TARGET_ASM_UNALIGNED_HI_OP
283 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
284 #undef TARGET_ASM_UNALIGNED_SI_OP
285 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
287 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
288 #undef TARGET_ASM_UNALIGNED_DI_OP
289 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START sh_file_start
304 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
305 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
307 #undef TARGET_DEFAULT_TARGET_FLAGS
308 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
309 #undef TARGET_HANDLE_OPTION
310 #define TARGET_HANDLE_OPTION sh_handle_option
312 #undef TARGET_INSERT_ATTRIBUTES
313 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
318 #undef TARGET_SCHED_ISSUE_RATE
319 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
321 /* The next 5 hooks have been implemented for reenabling sched1. With the
322 help of these macros we are limiting the movement of insns in sched1 to
323 reduce the register pressure. The overall idea is to keep count of SImode
324 and SFmode regs required by already scheduled insns. When these counts
325 cross some threshold values; give priority to insns that free registers.
326 The insn that frees registers is most likely to be the insn with lowest
327 LUID (original insn order); but such an insn might be there in the stalled
328 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
329 upto a max of 8 cycles so that such insns may move from Q -> R.
331 The description of the hooks are as below:
333 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
334 scheduler; it is called inside the sched_init function just after
335 find_insn_reg_weights function call. It is used to calculate the SImode
336 and SFmode weights of insns of basic blocks; much similar to what
337 find_insn_reg_weights does.
338 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
340 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
341 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
344 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
345 high; reorder the ready queue so that the insn with lowest LUID will be
348 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
349 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
351 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
352 can be returned from TARGET_SCHED_REORDER2.
354 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
356 #undef TARGET_SCHED_DFA_NEW_CYCLE
357 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
359 #undef TARGET_SCHED_INIT_GLOBAL
360 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
362 #undef TARGET_SCHED_FINISH_GLOBAL
363 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
365 #undef TARGET_SCHED_VARIABLE_ISSUE
366 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
368 #undef TARGET_SCHED_REORDER
369 #define TARGET_SCHED_REORDER sh_reorder
371 #undef TARGET_SCHED_REORDER2
372 #define TARGET_SCHED_REORDER2 sh_reorder2
374 #undef TARGET_SCHED_INIT
375 #define TARGET_SCHED_INIT sh_md_init
377 #undef TARGET_CANNOT_MODIFY_JUMPS_P
378 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
379 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
380 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
381 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
382 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
383 sh_optimize_target_register_callee_saved
385 #undef TARGET_MS_BITFIELD_LAYOUT_P
386 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
388 #undef TARGET_INIT_BUILTINS
389 #define TARGET_INIT_BUILTINS sh_init_builtins
390 #undef TARGET_EXPAND_BUILTIN
391 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
393 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
394 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
396 #undef TARGET_CANNOT_COPY_INSN_P
397 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
398 #undef TARGET_RTX_COSTS
399 #define TARGET_RTX_COSTS sh_rtx_costs
400 #undef TARGET_ADDRESS_COST
401 #define TARGET_ADDRESS_COST sh_address_cost
402 #undef TARGET_ALLOCATE_INITIAL_VALUE
403 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
409 #undef TARGET_HAVE_TLS
410 #define TARGET_HAVE_TLS true
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_ARGS
416 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
417 #undef TARGET_PROMOTE_FUNCTION_RETURN
418 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
420 #undef TARGET_STRUCT_VALUE_RTX
421 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
425 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
426 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
427 #undef TARGET_SETUP_INCOMING_VARARGS
428 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
429 #undef TARGET_STRICT_ARGUMENT_NAMING
430 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
431 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
432 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
433 #undef TARGET_MUST_PASS_IN_STACK
434 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
435 #undef TARGET_PASS_BY_REFERENCE
436 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
437 #undef TARGET_CALLEE_COPIES
438 #define TARGET_CALLEE_COPIES sh_callee_copies
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_EXPAND_BUILTIN_VA_START
445 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
446 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
447 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
449 #undef TARGET_SCALAR_MODE_SUPPORTED_P
450 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
454 #undef TARGET_CHECK_PCH_TARGET_FLAGS
455 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
457 #undef TARGET_DWARF_CALLING_CONVENTION
458 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
460 /* Return regmode weight for insn. */
461 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
463 /* Return current register pressure for regmode. */
464 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
471 #undef TARGET_ENCODE_SECTION_INFO
472 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
475 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
476 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
480 #undef TARGET_SECONDARY_RELOAD
481 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
483 /* Machine-specific symbol_ref flags. */
484 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
486 struct gcc_target targetm = TARGET_INITIALIZER;
488 /* Implement TARGET_HANDLE_OPTION. */
491 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
492 int value ATTRIBUTE_UNUSED)
497 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
501 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
505 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
509 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
516 case OPT_m2a_single_only:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
540 case OPT_m4_100_nofpu:
541 case OPT_m4_200_nofpu:
542 case OPT_m4_300_nofpu:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
550 case OPT_m4_100_single:
551 case OPT_m4_200_single:
552 case OPT_m4_300_single:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
556 case OPT_m4_single_only:
557 case OPT_m4_100_single_only:
558 case OPT_m4_200_single_only:
559 case OPT_m4_300_single_only:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
576 case OPT_m4a_single_only:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
584 case OPT_m5_32media_nofpu:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
592 case OPT_m5_64media_nofpu:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
600 case OPT_m5_compact_nofpu:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
609 /* Print the operand address in x to the stream. */
612 print_operand_address (FILE *stream, rtx x)
614 switch (GET_CODE (x))
618 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
623 rtx base = XEXP (x, 0);
624 rtx index = XEXP (x, 1);
626 switch (GET_CODE (index))
629 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
630 reg_names[true_regnum (base)]);
636 int base_num = true_regnum (base);
637 int index_num = true_regnum (index);
639 fprintf (stream, "@(r0,%s)",
640 reg_names[MAX (base_num, index_num)]);
651 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
655 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
659 x = mark_constant_pool_use (x);
660 output_addr_const (stream, x);
665 /* Print operand x (an rtx) in assembler syntax to file stream
666 according to modifier code.
668 '.' print a .s if insn needs delay slot
669 ',' print LOCAL_LABEL_PREFIX
670 '@' print trap, rte or rts depending upon pragma interruptness
671 '#' output a nop if there is nothing to put in the delay slot
672 ''' print likelihood suffix (/u for unlikely).
673 '>' print branch target if -fverbose-asm
674 'O' print a constant without the #
675 'R' print the LSW of a dp value - changes if in little endian
676 'S' print the MSW of a dp value - changes if in little endian
677 'T' print the next word of a dp value - same as 'R' in big endian mode.
678 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
679 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
680 'N' print 'r63' if the operand is (const_int 0).
681 'd' print a V2SF reg as dN instead of fpN.
682 'm' print a pair `base,offset' or `base,index', for LD and ST.
683 'U' Likewise for {LD,ST}{HI,LO}.
684 'V' print the position of a single bit set.
685 'W' print the position of a single bit cleared.
686 't' print a memory address which is a register.
687 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
688 'o' output an operator. */
691 print_operand (FILE *stream, rtx x, int code)
694 enum machine_mode mode;
702 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
703 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
704 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
707 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
710 trapa_attr = lookup_attribute ("trap_exit",
711 DECL_ATTRIBUTES (current_function_decl));
713 fprintf (stream, "trapa #%ld",
714 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
715 else if (sh_cfun_interrupt_handler_p ())
717 if (sh_cfun_resbank_handler_p ())
718 fprintf (stream, "resbank\n");
719 fprintf (stream, "rte");
722 fprintf (stream, "rts");
725 /* Output a nop if there's nothing in the delay slot. */
726 if (dbr_sequence_length () == 0)
727 fprintf (stream, "\n\tnop");
731 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
733 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
734 fputs ("/u", stream);
738 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
740 fputs ("\t! target: ", stream);
741 output_addr_const (stream, JUMP_LABEL (current_output_insn));
745 x = mark_constant_pool_use (x);
746 output_addr_const (stream, x);
748 /* N.B.: %R / %S / %T adjust memory addresses by four.
749 For SHMEDIA, that means they can be used to access the first and
750 second 32 bit part of a 64 bit (or larger) value that
751 might be held in floating point registers or memory.
752 While they can be used to access 64 bit parts of a larger value
753 held in general purpose registers, that won't work with memory -
754 neither for fp registers, since the frxx names are used. */
756 if (REG_P (x) || GET_CODE (x) == SUBREG)
758 regno = true_regnum (x);
759 regno += FP_REGISTER_P (regno) ? 1 : LSW;
760 fputs (reg_names[regno], (stream));
764 x = adjust_address (x, SImode, 4 * LSW);
765 print_operand_address (stream, XEXP (x, 0));
772 if (mode == VOIDmode)
774 if (GET_MODE_SIZE (mode) >= 8)
775 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
777 print_operand (stream, sub, 0);
779 output_operand_lossage ("invalid operand to %%R");
783 if (REG_P (x) || GET_CODE (x) == SUBREG)
785 regno = true_regnum (x);
786 regno += FP_REGISTER_P (regno) ? 0 : MSW;
787 fputs (reg_names[regno], (stream));
791 x = adjust_address (x, SImode, 4 * MSW);
792 print_operand_address (stream, XEXP (x, 0));
799 if (mode == VOIDmode)
801 if (GET_MODE_SIZE (mode) >= 8)
802 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
804 print_operand (stream, sub, 0);
806 output_operand_lossage ("invalid operand to %%S");
810 /* Next word of a double. */
811 switch (GET_CODE (x))
814 fputs (reg_names[REGNO (x) + 1], (stream));
817 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
818 && GET_CODE (XEXP (x, 0)) != POST_INC)
819 x = adjust_address (x, SImode, 4);
820 print_operand_address (stream, XEXP (x, 0));
828 gcc_assert (GET_CODE (x) == MEM);
830 switch (GET_CODE (x))
834 print_operand (stream, x, 0);
842 switch (GET_CODE (x))
844 case PLUS: fputs ("add", stream); break;
845 case MINUS: fputs ("sub", stream); break;
846 case MULT: fputs ("mul", stream); break;
847 case DIV: fputs ("div", stream); break;
848 case EQ: fputs ("eq", stream); break;
849 case NE: fputs ("ne", stream); break;
850 case GT: case LT: fputs ("gt", stream); break;
851 case GE: case LE: fputs ("ge", stream); break;
852 case GTU: case LTU: fputs ("gtu", stream); break;
853 case GEU: case LEU: fputs ("geu", stream); break;
861 if (GET_CODE (x) == MEM
862 && GET_CODE (XEXP (x, 0)) == PLUS
863 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
864 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
869 if (GET_CODE (x) == MEM)
871 switch (GET_MODE (x))
873 case QImode: fputs (".b", stream); break;
874 case HImode: fputs (".w", stream); break;
875 case SImode: fputs (".l", stream); break;
876 case SFmode: fputs (".s", stream); break;
877 case DFmode: fputs (".d", stream); break;
878 default: gcc_unreachable ();
885 gcc_assert (GET_CODE (x) == MEM);
889 switch (GET_CODE (x))
893 print_operand (stream, x, 0);
894 fputs (", 0", stream);
898 print_operand (stream, XEXP (x, 0), 0);
899 fputs (", ", stream);
900 print_operand (stream, XEXP (x, 1), 0);
910 int num = exact_log2 (INTVAL (x));
911 gcc_assert (num >= 0);
912 fprintf (stream, "#%d", num);
918 int num = exact_log2 (~INTVAL (x));
919 gcc_assert (num >= 0);
920 fprintf (stream, "#%d", num);
925 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
927 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
931 if (x == CONST0_RTX (GET_MODE (x)))
933 fprintf ((stream), "r63");
938 if (GET_CODE (x) == CONST_INT)
940 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
950 switch (GET_CODE (x))
954 rtx inner = XEXP (x, 0);
956 enum machine_mode inner_mode;
958 /* We might see SUBREGs with vector mode registers inside. */
959 if (GET_CODE (inner) == SUBREG
960 && (GET_MODE_SIZE (GET_MODE (inner))
961 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
962 && subreg_lowpart_p (inner))
963 inner = SUBREG_REG (inner);
964 if (GET_CODE (inner) == CONST_INT)
966 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
969 inner_mode = GET_MODE (inner);
970 if (GET_CODE (inner) == SUBREG
971 && (GET_MODE_SIZE (GET_MODE (inner))
972 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
973 && GET_CODE (SUBREG_REG (inner)) == REG)
975 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
976 GET_MODE (SUBREG_REG (inner)),
979 inner = SUBREG_REG (inner);
981 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
983 /* Floating point register pairs are always big endian;
984 general purpose registers are 64 bit wide. */
985 regno = REGNO (inner);
986 regno = (HARD_REGNO_NREGS (regno, inner_mode)
987 - HARD_REGNO_NREGS (regno, mode))
995 /* FIXME: We need this on SHmedia32 because reload generates
996 some sign-extended HI or QI loads into DImode registers
997 but, because Pmode is SImode, the address ends up with a
998 subreg:SI of the DImode register. Maybe reload should be
999 fixed so as to apply alter_subreg to such loads? */
1001 gcc_assert (trapping_target_operand (x, VOIDmode));
1002 x = XEXP (XEXP (x, 2), 0);
1003 goto default_output;
1005 gcc_assert (SUBREG_BYTE (x) == 0
1006 && GET_CODE (SUBREG_REG (x)) == REG);
1014 if (FP_REGISTER_P (regno)
1015 && mode == V16SFmode)
1016 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1017 else if (FP_REGISTER_P (REGNO (x))
1018 && mode == V4SFmode)
1019 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1020 else if (GET_CODE (x) == REG
1021 && mode == V2SFmode)
1022 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1023 else if (FP_REGISTER_P (REGNO (x))
1024 && GET_MODE_SIZE (mode) > 4)
1025 fprintf ((stream), "d%s", reg_names[regno] + 1);
1027 fputs (reg_names[regno], (stream));
1031 output_address (XEXP (x, 0));
1036 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
1037 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
1038 && (GET_MODE (XEXP (x, 0)) == DImode
1039 || GET_MODE (XEXP (x, 0)) == SImode)
1040 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
1041 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
1043 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
1045 bool nested_expr = false;
1047 fputc ('(', stream);
1048 if (GET_CODE (val) == ASHIFTRT)
1050 fputc ('(', stream);
1051 val2 = XEXP (val, 0);
1053 if (GET_CODE (val2) == CONST
1054 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1056 fputc ('(', stream);
1059 output_addr_const (stream, val2);
1061 fputc (')', stream);
1062 if (GET_CODE (val) == ASHIFTRT)
1064 fputs (" >> ", stream);
1065 output_addr_const (stream, XEXP (val, 1));
1066 fputc (')', stream);
1068 fputs (" & 65535)", stream);
1075 fputc ('#', stream);
1076 output_addr_const (stream, x);
1084 /* Encode symbol attributes of a SYMBOL_REF into its
1085 SYMBOL_REF_FLAGS. */
1087 sh_encode_section_info (tree decl, rtx rtl, int first)
1089 default_encode_section_info (decl, rtl, first);
1091 if (TREE_CODE (decl) == FUNCTION_DECL
1092 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1093 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1096 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1098 force_into (rtx value, rtx target)
1100 value = force_operand (value, target);
1101 if (! rtx_equal_p (value, target))
1102 emit_insn (gen_move_insn (target, value));
1105 /* Emit code to perform a block move. Choose the best method.
1107 OPERANDS[0] is the destination.
1108 OPERANDS[1] is the source.
1109 OPERANDS[2] is the size.
1110 OPERANDS[3] is the alignment safe to use. */
1113 expand_block_move (rtx *operands)
1115 int align = INTVAL (operands[3]);
1116 int constp = (GET_CODE (operands[2]) == CONST_INT);
1117 int bytes = (constp ? INTVAL (operands[2]) : 0);
1122 /* If we could use mov.l to move words and dest is word-aligned, we
1123 can use movua.l for loads and still generate a relatively short
1124 and efficient sequence. */
1125 if (TARGET_SH4A_ARCH && align < 4
1126 && MEM_ALIGN (operands[0]) >= 32
1127 && can_move_by_pieces (bytes, 32))
1129 rtx dest = copy_rtx (operands[0]);
1130 rtx src = copy_rtx (operands[1]);
1131 /* We could use different pseudos for each copied word, but
1132 since movua can only load into r0, it's kind of
1134 rtx temp = gen_reg_rtx (SImode);
1135 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1138 while (copied + 4 <= bytes)
1140 rtx to = adjust_address (dest, SImode, copied);
1141 rtx from = adjust_automodify_address (src, BLKmode,
1144 set_mem_size (from, GEN_INT (4));
1145 emit_insn (gen_movua (temp, from));
1146 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1147 emit_move_insn (to, temp);
1152 move_by_pieces (adjust_address (dest, BLKmode, copied),
1153 adjust_automodify_address (src, BLKmode,
1155 bytes - copied, align, 0);
1160 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1161 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1162 if (align < 4 || (bytes % 4 != 0))
1165 if (TARGET_HARD_SH4)
1169 else if (bytes == 12)
1171 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1172 rtx r4 = gen_rtx_REG (SImode, 4);
1173 rtx r5 = gen_rtx_REG (SImode, 5);
1175 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1176 force_into (XEXP (operands[0], 0), r4);
1177 force_into (XEXP (operands[1], 0), r5);
1178 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1181 else if (! TARGET_SMALLCODE)
1183 const char *entry_name;
1184 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1186 rtx r4 = gen_rtx_REG (SImode, 4);
1187 rtx r5 = gen_rtx_REG (SImode, 5);
1188 rtx r6 = gen_rtx_REG (SImode, 6);
1190 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1191 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1192 force_into (XEXP (operands[0], 0), r4);
1193 force_into (XEXP (operands[1], 0), r5);
1195 dwords = bytes >> 3;
1196 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1197 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1206 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1207 rtx r4 = gen_rtx_REG (SImode, 4);
1208 rtx r5 = gen_rtx_REG (SImode, 5);
1210 sprintf (entry, "__movmemSI%d", bytes);
1211 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1212 force_into (XEXP (operands[0], 0), r4);
1213 force_into (XEXP (operands[1], 0), r5);
1214 emit_insn (gen_block_move_real (func_addr_rtx));
1218 /* This is the same number of bytes as a memcpy call, but to a different
1219 less common function name, so this will occasionally use more space. */
1220 if (! TARGET_SMALLCODE)
1222 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1223 int final_switch, while_loop;
1224 rtx r4 = gen_rtx_REG (SImode, 4);
1225 rtx r5 = gen_rtx_REG (SImode, 5);
1226 rtx r6 = gen_rtx_REG (SImode, 6);
1228 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1229 force_into (XEXP (operands[0], 0), r4);
1230 force_into (XEXP (operands[1], 0), r5);
1232 /* r6 controls the size of the move. 16 is decremented from it
1233 for each 64 bytes moved. Then the negative bit left over is used
1234 as an index into a list of move instructions. e.g., a 72 byte move
1235 would be set up with size(r6) = 14, for one iteration through the
1236 big while loop, and a switch of -2 for the last part. */
1238 final_switch = 16 - ((bytes / 4) % 16);
1239 while_loop = ((bytes / 4) / 16 - 1) * 16;
1240 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1241 emit_insn (gen_block_lump_real (func_addr_rtx));
1248 /* Prepare operands for a move define_expand; specifically, one of the
1249 operands must be in a register. */
1252 prepare_move_operands (rtx operands[], enum machine_mode mode)
1254 if ((mode == SImode || mode == DImode)
1256 && ! ((mode == Pmode || mode == ptr_mode)
1257 && tls_symbolic_operand (operands[1], Pmode) != 0))
1260 if (SYMBOLIC_CONST_P (operands[1]))
1262 if (GET_CODE (operands[0]) == MEM)
1263 operands[1] = force_reg (Pmode, operands[1]);
1264 else if (TARGET_SHMEDIA
1265 && GET_CODE (operands[1]) == LABEL_REF
1266 && target_reg_operand (operands[0], mode))
1270 temp = (!can_create_pseudo_p ()
1272 : gen_reg_rtx (Pmode));
1273 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1276 else if (GET_CODE (operands[1]) == CONST
1277 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1278 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1280 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1281 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1283 operands[1] = expand_binop (mode, add_optab, temp,
1284 XEXP (XEXP (operands[1], 0), 1),
1285 (!can_create_pseudo_p ()
1287 : gen_reg_rtx (Pmode)),
1288 0, OPTAB_LIB_WIDEN);
1292 if (! reload_in_progress && ! reload_completed)
1294 /* Copy the source to a register if both operands aren't registers. */
1295 if (! register_operand (operands[0], mode)
1296 && ! sh_register_operand (operands[1], mode))
1297 operands[1] = copy_to_mode_reg (mode, operands[1]);
1299 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1301 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1302 except that we can't use that function because it is static. */
1303 rtx new = change_address (operands[0], mode, 0);
1304 MEM_COPY_ATTRIBUTES (new, operands[0]);
1308 /* This case can happen while generating code to move the result
1309 of a library call to the target. Reject `st r0,@(rX,rY)' because
1310 reload will fail to find a spill register for rX, since r0 is already
1311 being used for the source. */
1313 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1314 && GET_CODE (operands[0]) == MEM
1315 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1316 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1317 operands[1] = copy_to_mode_reg (mode, operands[1]);
1320 if (mode == Pmode || mode == ptr_mode)
1323 enum tls_model tls_kind;
1327 if (GET_CODE (op1) == CONST
1328 && GET_CODE (XEXP (op1, 0)) == PLUS
1329 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1331 opc = XEXP (XEXP (op1, 0), 1);
1332 op1 = XEXP (XEXP (op1, 0), 0);
1337 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1339 rtx tga_op1, tga_ret, tmp, tmp2;
1343 case TLS_MODEL_GLOBAL_DYNAMIC:
1344 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1345 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1349 case TLS_MODEL_LOCAL_DYNAMIC:
1350 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1351 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1353 tmp = gen_reg_rtx (Pmode);
1354 emit_move_insn (tmp, tga_ret);
1356 if (register_operand (op0, Pmode))
1359 tmp2 = gen_reg_rtx (Pmode);
1361 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1365 case TLS_MODEL_INITIAL_EXEC:
1368 /* Don't schedule insns for getting GOT address when
1369 the first scheduling is enabled, to avoid spill
1371 if (flag_schedule_insns)
1372 emit_insn (gen_blockage ());
1373 emit_insn (gen_GOTaddr2picreg ());
1374 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1376 if (flag_schedule_insns)
1377 emit_insn (gen_blockage ());
1379 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1380 tmp = gen_sym2GOTTPOFF (op1);
1381 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1385 case TLS_MODEL_LOCAL_EXEC:
1386 tmp2 = gen_reg_rtx (Pmode);
1387 emit_insn (gen_load_gbr (tmp2));
1388 tmp = gen_reg_rtx (Pmode);
1389 emit_insn (gen_symTPOFF2reg (tmp, op1));
1391 if (register_operand (op0, Pmode))
1394 op1 = gen_reg_rtx (Pmode);
1396 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1403 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1412 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1413 enum rtx_code comparison)
1416 rtx scratch = NULL_RTX;
1418 if (comparison == CODE_FOR_nothing)
1419 comparison = GET_CODE (operands[0]);
1421 scratch = operands[4];
1422 if (GET_CODE (operands[1]) == CONST_INT
1423 && GET_CODE (operands[2]) != CONST_INT)
1425 rtx tmp = operands[1];
1427 operands[1] = operands[2];
1429 comparison = swap_condition (comparison);
1431 if (GET_CODE (operands[2]) == CONST_INT)
1433 HOST_WIDE_INT val = INTVAL (operands[2]);
1434 if ((val == -1 || val == -0x81)
1435 && (comparison == GT || comparison == LE))
1437 comparison = (comparison == GT) ? GE : LT;
1438 operands[2] = gen_int_mode (val + 1, mode);
1440 else if ((val == 1 || val == 0x80)
1441 && (comparison == GE || comparison == LT))
1443 comparison = (comparison == GE) ? GT : LE;
1444 operands[2] = gen_int_mode (val - 1, mode);
1446 else if (val == 1 && (comparison == GEU || comparison == LTU))
1448 comparison = (comparison == GEU) ? NE : EQ;
1449 operands[2] = CONST0_RTX (mode);
1451 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1453 comparison = (comparison == GEU) ? GTU : LEU;
1454 operands[2] = gen_int_mode (val - 1, mode);
1456 else if (val == 0 && (comparison == GTU || comparison == LEU))
1457 comparison = (comparison == GTU) ? NE : EQ;
1458 else if (mode == SImode
1459 && ((val == 0x7fffffff
1460 && (comparison == GTU || comparison == LEU))
1461 || ((unsigned HOST_WIDE_INT) val
1462 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1463 && (comparison == GEU || comparison == LTU))))
1465 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1466 operands[2] = CONST0_RTX (mode);
1470 if (can_create_pseudo_p ())
1471 operands[1] = force_reg (mode, op1);
1472 /* When we are handling DImode comparisons, we want to keep constants so
1473 that we can optimize the component comparisons; however, memory loads
1474 are better issued as a whole so that they can be scheduled well.
1475 SImode equality comparisons allow I08 constants, but only when they
1476 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1477 into a register, that register might as well be r0, and we allow the
1478 constant. If it is already in a register, this is likely to be
1479 allocated to a different hard register, thus we load the constant into
1480 a register unless it is zero. */
1481 if (!REG_P (operands[2])
1482 && (GET_CODE (operands[2]) != CONST_INT
1483 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1484 && ((comparison != EQ && comparison != NE)
1485 || (REG_P (op1) && REGNO (op1) != R0_REG)
1486 || !satisfies_constraint_I08 (operands[2])))))
1488 if (scratch && GET_MODE (scratch) == mode)
1490 emit_move_insn (scratch, operands[2]);
1491 operands[2] = scratch;
1493 else if (can_create_pseudo_p ())
1494 operands[2] = force_reg (mode, operands[2]);
1500 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1502 rtx (*branch_expander) (rtx) = gen_branch_true;
1505 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1508 case NE: case LT: case LE: case LTU: case LEU:
1509 comparison = reverse_condition (comparison);
1510 branch_expander = gen_branch_false;
1513 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1514 gen_rtx_fmt_ee (comparison, SImode,
1515 operands[1], operands[2])));
1516 jump = emit_jump_insn (branch_expander (operands[3]));
1517 if (probability >= 0)
1519 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1524 /* ??? How should we distribute probabilities when more than one branch
1525 is generated. So far we only have soem ad-hoc observations:
1526 - If the operands are random, they are likely to differ in both parts.
1527 - If comparing items in a hash chain, the operands are random or equal;
1528 operation should be EQ or NE.
1529 - If items are searched in an ordered tree from the root, we can expect
1530 the highpart to be unequal about half of the time; operation should be
1531 an inequality comparison, operands non-constant, and overall probability
1532 about 50%. Likewise for quicksort.
1533 - Range checks will be often made against constants. Even if we assume for
1534 simplicity an even distribution of the non-constant operand over a
1535 sub-range here, the same probability could be generated with differently
1536 wide sub-ranges - as long as the ratio of the part of the subrange that
1537 is before the threshold to the part that comes after the threshold stays
1538 the same. Thus, we can't really tell anything here;
1539 assuming random distribution is at least simple.
1543 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1545 enum rtx_code msw_taken, msw_skip, lsw_taken;
1546 rtx skip_label = NULL_RTX;
1547 rtx op1h, op1l, op2h, op2l;
1550 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1551 rtx scratch = operands[4];
1553 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1554 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1555 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1556 op1l = gen_lowpart (SImode, operands[1]);
1557 op2l = gen_lowpart (SImode, operands[2]);
1558 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1559 prob = split_branch_probability;
1560 rev_prob = REG_BR_PROB_BASE - prob;
1563 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1564 That costs 1 cycle more when the first branch can be predicted taken,
1565 but saves us mispredicts because only one branch needs prediction.
1566 It also enables generating the cmpeqdi_t-1 pattern. */
1568 if (TARGET_CMPEQDI_T)
1570 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1571 emit_jump_insn (gen_branch_true (operands[3]));
1578 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1580 msw_skip_prob = rev_prob;
1581 if (REG_BR_PROB_BASE <= 65535)
1582 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1585 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1589 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1590 / ((HOST_WIDEST_INT) prob << 32)))
1596 if (TARGET_CMPEQDI_T)
1598 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1599 emit_jump_insn (gen_branch_false (operands[3]));
1603 msw_taken_prob = prob;
1608 msw_taken = comparison;
1609 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1611 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1612 msw_skip = swap_condition (msw_taken);
1616 if (op2l == CONST0_RTX (SImode))
1617 msw_taken = comparison;
1620 msw_taken = comparison == GE ? GT : GTU;
1621 msw_skip = swap_condition (msw_taken);
1626 msw_taken = comparison;
1627 if (op2l == CONST0_RTX (SImode))
1629 msw_skip = swap_condition (msw_taken);
1633 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1634 msw_taken = comparison;
1638 if (comparison == LE)
1640 else if (op2h != CONST0_RTX (SImode))
1644 msw_skip = swap_condition (msw_taken);
1647 default: return false;
1649 num_branches = ((msw_taken != CODE_FOR_nothing)
1650 + (msw_skip != CODE_FOR_nothing)
1651 + (lsw_taken != CODE_FOR_nothing));
1652 if (comparison != EQ && comparison != NE && num_branches > 1)
1654 if (!CONSTANT_P (operands[2])
1655 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1656 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1658 msw_taken_prob = prob / 2U;
1660 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1661 lsw_taken_prob = prob;
1665 msw_taken_prob = prob;
1666 msw_skip_prob = REG_BR_PROB_BASE;
1667 /* ??? If we have a constant op2h, should we use that when
1668 calculating lsw_taken_prob? */
1669 lsw_taken_prob = prob;
1674 operands[4] = NULL_RTX;
1675 if (reload_completed
1676 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1677 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1679 emit_move_insn (scratch, operands[2]);
1680 operands[2] = scratch;
1682 if (msw_taken != CODE_FOR_nothing)
1683 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1684 if (msw_skip != CODE_FOR_nothing)
1686 rtx taken_label = operands[3];
1688 operands[3] = skip_label = gen_label_rtx ();
1689 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1690 operands[3] = taken_label;
1694 if (lsw_taken != CODE_FOR_nothing)
1696 if (reload_completed
1697 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1698 operands[4] = scratch;
1699 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1701 if (msw_skip != CODE_FOR_nothing)
1702 emit_label (skip_label);
1706 /* Prepare the operands for an scc instruction; make sure that the
1707 compare has been done. */
1709 prepare_scc_operands (enum rtx_code code)
1711 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1712 enum rtx_code oldcode = code;
1713 enum machine_mode mode;
1715 /* First need a compare insn. */
1719 /* It isn't possible to handle this case. */
1736 if (code != oldcode)
1738 rtx tmp = sh_compare_op0;
1739 sh_compare_op0 = sh_compare_op1;
1740 sh_compare_op1 = tmp;
1743 mode = GET_MODE (sh_compare_op0);
1744 if (mode == VOIDmode)
1745 mode = GET_MODE (sh_compare_op1);
1747 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1748 if ((code != EQ && code != NE
1749 && (sh_compare_op1 != const0_rtx
1750 || code == GTU || code == GEU || code == LTU || code == LEU))
1751 || (mode == DImode && sh_compare_op1 != const0_rtx)
1752 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1753 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1755 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1756 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1757 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1758 gen_rtx_SET (VOIDmode, t_reg,
1759 gen_rtx_fmt_ee (code, SImode,
1760 sh_compare_op0, sh_compare_op1)),
1761 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1763 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1764 gen_rtx_fmt_ee (code, SImode,
1765 sh_compare_op0, sh_compare_op1)));
1770 /* Called from the md file, set up the operands of a compare instruction. */
1773 from_compare (rtx *operands, int code)
1775 enum machine_mode mode = GET_MODE (sh_compare_op0);
1777 if (mode == VOIDmode)
1778 mode = GET_MODE (sh_compare_op1);
1781 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1783 /* Force args into regs, since we can't use constants here. */
1784 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1785 if (sh_compare_op1 != const0_rtx
1786 || code == GTU || code == GEU
1787 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1788 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1790 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1792 from_compare (operands, GT);
1793 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1796 insn = gen_rtx_SET (VOIDmode,
1797 gen_rtx_REG (SImode, T_REG),
1798 gen_rtx_fmt_ee (code, SImode,
1799 sh_compare_op0, sh_compare_op1));
1800 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1802 insn = gen_rtx_PARALLEL (VOIDmode,
1804 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1805 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1811 /* Functions to output assembly code. */
1813 /* Return a sequence of instructions to perform DI or DF move.
1815 Since the SH cannot move a DI or DF in one instruction, we have
1816 to take care when we see overlapping source and dest registers. */
1819 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1820 enum machine_mode mode)
1822 rtx dst = operands[0];
1823 rtx src = operands[1];
1825 if (GET_CODE (dst) == MEM
1826 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1827 return "mov.l %T1,%0\n\tmov.l %1,%0";
1829 if (register_operand (dst, mode)
1830 && register_operand (src, mode))
1832 if (REGNO (src) == MACH_REG)
1833 return "sts mach,%S0\n\tsts macl,%R0";
1835 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1836 when mov.d r1,r0 do r1->r0 then r2->r1. */
1838 if (REGNO (src) + 1 == REGNO (dst))
1839 return "mov %T1,%T0\n\tmov %1,%0";
1841 return "mov %1,%0\n\tmov %T1,%T0";
1843 else if (GET_CODE (src) == CONST_INT)
1845 if (INTVAL (src) < 0)
1846 output_asm_insn ("mov #-1,%S0", operands);
1848 output_asm_insn ("mov #0,%S0", operands);
1850 return "mov %1,%R0";
1852 else if (GET_CODE (src) == MEM)
1855 int dreg = REGNO (dst);
1856 rtx inside = XEXP (src, 0);
1858 switch (GET_CODE (inside))
1861 ptrreg = REGNO (inside);
1865 ptrreg = subreg_regno (inside);
1869 ptrreg = REGNO (XEXP (inside, 0));
1870 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1871 an offsettable address. Unfortunately, offsettable addresses use
1872 QImode to check the offset, and a QImode offsettable address
1873 requires r0 for the other operand, which is not currently
1874 supported, so we can't use the 'o' constraint.
1875 Thus we must check for and handle r0+REG addresses here.
1876 We punt for now, since this is likely very rare. */
1877 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1881 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1883 return "mov.l %1,%0\n\tmov.l %1,%T0";
1888 /* Work out the safe way to copy. Copy into the second half first. */
1890 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1893 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1896 /* Print an instruction which would have gone into a delay slot after
1897 another instruction, but couldn't because the other instruction expanded
1898 into a sequence where putting the slot insn at the end wouldn't work. */
1901 print_slot (rtx insn)
1903 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1905 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1909 output_far_jump (rtx insn, rtx op)
1911 struct { rtx lab, reg, op; } this;
1912 rtx braf_base_lab = NULL_RTX;
1915 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1918 this.lab = gen_label_rtx ();
1922 && offset - get_attr_length (insn) <= 32766)
1925 jump = "mov.w %O0,%1; braf %1";
1933 jump = "mov.l %O0,%1; braf %1";
1935 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1938 jump = "mov.l %O0,%1; jmp @%1";
1940 /* If we have a scratch register available, use it. */
1941 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1942 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1944 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1945 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1946 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1947 output_asm_insn (jump, &this.lab);
1948 if (dbr_sequence_length ())
1949 print_slot (final_sequence);
1951 output_asm_insn ("nop", 0);
1955 /* Output the delay slot insn first if any. */
1956 if (dbr_sequence_length ())
1957 print_slot (final_sequence);
1959 this.reg = gen_rtx_REG (SImode, 13);
1960 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1961 Fortunately, MACL is fixed and call-clobbered, and we never
1962 need its value across jumps, so save r13 in it instead of in
1965 output_asm_insn ("lds r13, macl", 0);
1967 output_asm_insn ("mov.l r13,@-r15", 0);
1968 output_asm_insn (jump, &this.lab);
1970 output_asm_insn ("sts macl, r13", 0);
1972 output_asm_insn ("mov.l @r15+,r13", 0);
1974 if (far && flag_pic && TARGET_SH2)
1976 braf_base_lab = gen_label_rtx ();
1977 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1978 CODE_LABEL_NUMBER (braf_base_lab));
1981 output_asm_insn (".align 2", 0);
1982 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1984 if (far && flag_pic)
1987 this.lab = braf_base_lab;
1988 output_asm_insn (".long %O2-%O0", &this.lab);
1991 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1995 /* Local label counter, used for constants in the pool and inside
1996 pattern branches. */
1998 static int lf = 100;
2000 /* Output code for ordinary branches. */
2003 output_branch (int logic, rtx insn, rtx *operands)
2005 switch (get_attr_length (insn))
2008 /* This can happen if filling the delay slot has caused a forward
2009 branch to exceed its range (we could reverse it, but only
2010 when we know we won't overextend other branches; this should
2011 best be handled by relaxation).
2012 It can also happen when other condbranches hoist delay slot insn
2013 from their destination, thus leading to code size increase.
2014 But the branch will still be in the range -4092..+4098 bytes. */
2019 /* The call to print_slot will clobber the operands. */
2020 rtx op0 = operands[0];
2022 /* If the instruction in the delay slot is annulled (true), then
2023 there is no delay slot where we can put it now. The only safe
2024 place for it is after the label. final will do that by default. */
2027 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2028 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2030 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2031 ASSEMBLER_DIALECT ? "/" : ".", label);
2032 print_slot (final_sequence);
2035 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2037 output_asm_insn ("bra\t%l0", &op0);
2038 fprintf (asm_out_file, "\tnop\n");
2039 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2043 /* When relaxing, handle this like a short branch. The linker
2044 will fix it up if it still doesn't fit after relaxation. */
2046 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2048 /* These are for SH2e, in which we have to account for the
2049 extra nop because of the hardware bug in annulled branches. */
2055 gcc_assert (!final_sequence
2056 || !(INSN_ANNULLED_BRANCH_P
2057 (XVECEXP (final_sequence, 0, 0))));
2058 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2060 ASSEMBLER_DIALECT ? "/" : ".", label);
2061 fprintf (asm_out_file, "\tnop\n");
2062 output_asm_insn ("bra\t%l0", operands);
2063 fprintf (asm_out_file, "\tnop\n");
2064 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2068 /* When relaxing, fall through. */
2073 sprintf (buffer, "b%s%ss\t%%l0",
2075 ASSEMBLER_DIALECT ? "/" : ".");
2076 output_asm_insn (buffer, &operands[0]);
2081 /* There should be no longer branches now - that would
2082 indicate that something has destroyed the branches set
2083 up in machine_dependent_reorg. */
2088 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2089 fill in operands 9 as a label to the successor insn.
2090 We try to use jump threading where possible.
2091 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2092 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2093 follow jmp and bt, if the address is in range. */
2095 output_branchy_insn (enum rtx_code code, const char *template,
2096 rtx insn, rtx *operands)
2098 rtx next_insn = NEXT_INSN (insn);
2100 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2102 rtx src = SET_SRC (PATTERN (next_insn));
2103 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2105 /* Following branch not taken */
2106 operands[9] = gen_label_rtx ();
2107 emit_label_after (operands[9], next_insn);
2108 INSN_ADDRESSES_NEW (operands[9],
2109 INSN_ADDRESSES (INSN_UID (next_insn))
2110 + get_attr_length (next_insn));
2115 int offset = (branch_dest (next_insn)
2116 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2117 if (offset >= -252 && offset <= 258)
2119 if (GET_CODE (src) == IF_THEN_ELSE)
2121 src = XEXP (src, 1);
2127 operands[9] = gen_label_rtx ();
2128 emit_label_after (operands[9], insn);
2129 INSN_ADDRESSES_NEW (operands[9],
2130 INSN_ADDRESSES (INSN_UID (insn))
2131 + get_attr_length (insn));
2136 output_ieee_ccmpeq (rtx insn, rtx *operands)
2138 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2142 /* Output the start of the assembler file. */
2145 sh_file_start (void)
2147 default_file_start ();
2150 /* Declare the .directive section before it is used. */
2151 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2152 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2156 /* We need to show the text section with the proper
2157 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2158 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2159 will complain. We can teach GAS specifically about the
2160 default attributes for our choice of text section, but
2161 then we would have to change GAS again if/when we change
2162 the text section name. */
2163 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2165 /* Switch to the data section so that the coffsem symbol
2166 isn't in the text section. */
2167 switch_to_section (data_section);
2169 if (TARGET_LITTLE_ENDIAN)
2170 fputs ("\t.little\n", asm_out_file);
2174 if (TARGET_SHCOMPACT)
2175 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2176 else if (TARGET_SHMEDIA)
2177 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2178 TARGET_SHMEDIA64 ? 64 : 32);
2182 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2185 unspec_caller_rtx_p (rtx pat)
2187 switch (GET_CODE (pat))
2190 return unspec_caller_rtx_p (XEXP (pat, 0));
2193 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2195 return unspec_caller_rtx_p (XEXP (pat, 1));
2197 if (XINT (pat, 1) == UNSPEC_CALLER)
2206 /* Indicate that INSN cannot be duplicated. This is true for insn
2207 that generates a unique label. */
2210 sh_cannot_copy_insn_p (rtx insn)
2214 if (!reload_completed || !flag_pic)
2217 if (GET_CODE (insn) != INSN)
2219 if (asm_noperands (insn) >= 0)
2222 pat = PATTERN (insn);
2223 if (GET_CODE (pat) != SET)
2225 pat = SET_SRC (pat);
2227 if (unspec_caller_rtx_p (pat))
2233 /* Actual number of instructions used to make a shift by N. */
2234 static const char ashiftrt_insns[] =
2235 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2237 /* Left shift and logical right shift are the same. */
2238 static const char shift_insns[] =
2239 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2241 /* Individual shift amounts needed to get the above length sequences.
2242 One bit right shifts clobber the T bit, so when possible, put one bit
2243 shifts in the middle of the sequence, so the ends are eligible for
2244 branch delay slots. */
2245 static const short shift_amounts[32][5] = {
2246 {0}, {1}, {2}, {2, 1},
2247 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2248 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2249 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2250 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2251 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2252 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2253 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2255 /* Likewise, but for shift amounts < 16, up to three highmost bits
2256 might be clobbered. This is typically used when combined with some
2257 kind of sign or zero extension. */
2259 static const char ext_shift_insns[] =
2260 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2262 static const short ext_shift_amounts[32][4] = {
2263 {0}, {1}, {2}, {2, 1},
2264 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2265 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2266 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2267 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2268 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2269 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2270 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2272 /* Assuming we have a value that has been sign-extended by at least one bit,
2273 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2274 to shift it by N without data loss, and quicker than by other means? */
2275 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2277 /* This is used in length attributes in sh.md to help compute the length
2278 of arbitrary constant shift instructions. */
2281 shift_insns_rtx (rtx insn)
2283 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2284 int shift_count = INTVAL (XEXP (set_src, 1));
2285 enum rtx_code shift_code = GET_CODE (set_src);
2290 return ashiftrt_insns[shift_count];
2293 return shift_insns[shift_count];
2299 /* Return the cost of a shift. */
2309 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2311 if (GET_MODE (x) == DImode
2312 && GET_CODE (XEXP (x, 1)) == CONST_INT
2313 && INTVAL (XEXP (x, 1)) == 1)
2316 /* Everything else is invalid, because there is no pattern for it. */
2319 /* If shift by a non constant, then this will be expensive. */
2320 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2321 return SH_DYNAMIC_SHIFT_COST;
2323 value = INTVAL (XEXP (x, 1));
2325 /* Otherwise, return the true cost in instructions. */
2326 if (GET_CODE (x) == ASHIFTRT)
2328 int cost = ashiftrt_insns[value];
2329 /* If SH3, then we put the constant in a reg and use shad. */
2330 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2331 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2335 return shift_insns[value];
2338 /* Return the cost of an AND operation. */
2345 /* Anding with a register is a single cycle and instruction. */
2346 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2349 i = INTVAL (XEXP (x, 1));
2353 if (satisfies_constraint_I10 (XEXP (x, 1))
2354 || satisfies_constraint_J16 (XEXP (x, 1)))
2357 return 1 + rtx_cost (XEXP (x, 1), AND);
2360 /* These constants are single cycle extu.[bw] instructions. */
2361 if (i == 0xff || i == 0xffff)
2363 /* Constants that can be used in an and immediate instruction in a single
2364 cycle, but this requires r0, so make it a little more expensive. */
2365 if (CONST_OK_FOR_K08 (i))
2367 /* Constants that can be loaded with a mov immediate and an and.
2368 This case is probably unnecessary. */
2369 if (CONST_OK_FOR_I08 (i))
2371 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2372 This case is probably unnecessary. */
2376 /* Return the cost of an addition or a subtraction. */
2381 /* Adding a register is a single cycle insn. */
2382 if (GET_CODE (XEXP (x, 1)) == REG
2383 || GET_CODE (XEXP (x, 1)) == SUBREG)
2386 /* Likewise for small constants. */
2387 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2388 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2392 switch (GET_CODE (XEXP (x, 1)))
2397 return TARGET_SHMEDIA64 ? 5 : 3;
2400 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2402 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2404 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2412 /* Any other constant requires a 2 cycle pc-relative load plus an
2417 /* Return the cost of a multiply. */
2419 multcosts (rtx x ATTRIBUTE_UNUSED)
2421 if (sh_multcost >= 0)
2424 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2425 accept constants. Ideally, we would use a cost of one or two and
2426 add the cost of the operand, but disregard the latter when inside loops
2427 and loop invariant code motion is still to follow.
2428 Using a multiply first and splitting it later if it's a loss
2429 doesn't work because of different sign / zero extension semantics
2430 of multiplies vs. shifts. */
2431 return TARGET_SMALLCODE ? 2 : 3;
2435 /* We have a mul insn, so we can never take more than the mul and the
2436 read of the mac reg, but count more because of the latency and extra
2438 if (TARGET_SMALLCODE)
2443 /* If we're aiming at small code, then just count the number of
2444 insns in a multiply call sequence. */
2445 if (TARGET_SMALLCODE)
2448 /* Otherwise count all the insns in the routine we'd be calling too. */
2452 /* Compute a (partial) cost for rtx X. Return true if the complete
2453 cost has been computed, and false if subexpressions should be
2454 scanned. In either case, *TOTAL contains the cost result. */
2457 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2464 if (INTVAL (x) == 0)
2466 else if (outer_code == AND && and_operand ((x), DImode))
2468 else if ((outer_code == IOR || outer_code == XOR
2469 || outer_code == PLUS)
2470 && CONST_OK_FOR_I10 (INTVAL (x)))
2472 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2473 *total = COSTS_N_INSNS (outer_code != SET);
2474 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2475 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2476 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2477 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2479 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2482 if (CONST_OK_FOR_I08 (INTVAL (x)))
2484 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2485 && CONST_OK_FOR_K08 (INTVAL (x)))
2487 /* prepare_cmp_insn will force costly constants int registers before
2488 the cbranch[sd]i4 patterns can see them, so preserve potentially
2489 interesting ones not covered by I08 above. */
2490 else if (outer_code == COMPARE
2491 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2492 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2493 || INTVAL (x) == 0x7fffffff
2494 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2503 if (TARGET_SHMEDIA64)
2504 *total = COSTS_N_INSNS (4);
2505 else if (TARGET_SHMEDIA32)
2506 *total = COSTS_N_INSNS (2);
2513 *total = COSTS_N_INSNS (4);
2514 /* prepare_cmp_insn will force costly constants int registers before
2515 the cbranchdi4 pattern can see them, so preserve potentially
2516 interesting ones. */
2517 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2523 if (x == CONST0_RTX (GET_MODE (x)))
2525 else if (sh_1el_vec (x, VOIDmode))
2526 *total = outer_code != SET;
2527 if (sh_rep_vec (x, VOIDmode))
2528 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2529 + (outer_code != SET));
2530 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2535 *total = COSTS_N_INSNS (addsubcosts (x));
2539 *total = COSTS_N_INSNS (andcosts (x));
2543 *total = COSTS_N_INSNS (multcosts (x));
2549 *total = COSTS_N_INSNS (shiftcosts (x));
2556 *total = COSTS_N_INSNS (20);
2560 if (sh_1el_vec (x, VOIDmode))
2561 *total = outer_code != SET;
2562 if (sh_rep_vec (x, VOIDmode))
2563 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2564 + (outer_code != SET));
2565 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2578 /* Compute the cost of an address. For the SH, all valid addresses are
2579 the same cost. Use a slightly higher cost for reg + reg addressing,
2580 since it increases pressure on r0. */
2583 sh_address_cost (rtx X)
2585 return (GET_CODE (X) == PLUS
2586 && ! CONSTANT_P (XEXP (X, 1))
2587 && ! TARGET_SHMEDIA ? 1 : 0);
2590 /* Code to expand a shift. */
2593 gen_ashift (int type, int n, rtx reg)
2595 /* Negative values here come from the shift_amounts array. */
2608 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2612 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2614 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2617 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2622 /* Same for HImode */
2625 gen_ashift_hi (int type, int n, rtx reg)
2627 /* Negative values here come from the shift_amounts array. */
2641 /* We don't have HImode right shift operations because using the
2642 ordinary 32 bit shift instructions for that doesn't generate proper
2643 zero/sign extension.
2644 gen_ashift_hi is only called in contexts where we know that the
2645 sign extension works out correctly. */
2648 if (GET_CODE (reg) == SUBREG)
2650 offset = SUBREG_BYTE (reg);
2651 reg = SUBREG_REG (reg);
2653 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2657 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2662 /* Output RTL to split a constant shift into its component SH constant
2663 shift instructions. */
2666 gen_shifty_op (int code, rtx *operands)
2668 int value = INTVAL (operands[2]);
2671 /* Truncate the shift count in case it is out of bounds. */
2672 value = value & 0x1f;
2676 if (code == LSHIFTRT)
2678 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2679 emit_insn (gen_movt (operands[0]));
2682 else if (code == ASHIFT)
2684 /* There is a two instruction sequence for 31 bit left shifts,
2685 but it requires r0. */
2686 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2688 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2689 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2694 else if (value == 0)
2696 /* This can happen even when optimizing, if there were subregs before
2697 reload. Don't output a nop here, as this is never optimized away;
2698 use a no-op move instead. */
2699 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2703 max = shift_insns[value];
2704 for (i = 0; i < max; i++)
2705 gen_ashift (code, shift_amounts[value][i], operands[0]);
2708 /* Same as above, but optimized for values where the topmost bits don't
2712 gen_shifty_hi_op (int code, rtx *operands)
2714 int value = INTVAL (operands[2]);
2716 void (*gen_fun) (int, int, rtx);
2718 /* This operation is used by and_shl for SImode values with a few
2719 high bits known to be cleared. */
2723 emit_insn (gen_nop ());
2727 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2730 max = ext_shift_insns[value];
2731 for (i = 0; i < max; i++)
2732 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2735 /* When shifting right, emit the shifts in reverse order, so that
2736 solitary negative values come first. */
2737 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2738 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2741 /* Output RTL for an arithmetic right shift. */
2743 /* ??? Rewrite to use super-optimizer sequences. */
2746 expand_ashiftrt (rtx *operands)
2754 if (GET_CODE (operands[2]) != CONST_INT)
2756 rtx count = copy_to_mode_reg (SImode, operands[2]);
2757 emit_insn (gen_negsi2 (count, count));
2758 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2761 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2762 > 1 + SH_DYNAMIC_SHIFT_COST)
2765 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2766 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2770 if (GET_CODE (operands[2]) != CONST_INT)
2773 value = INTVAL (operands[2]) & 31;
2777 /* If we are called from abs expansion, arrange things so that we
2778 we can use a single MT instruction that doesn't clobber the source,
2779 if LICM can hoist out the load of the constant zero. */
2780 if (currently_expanding_to_rtl)
2782 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2784 emit_insn (gen_mov_neg_si_t (operands[0]));
2787 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2790 else if (value >= 16 && value <= 19)
2792 wrk = gen_reg_rtx (SImode);
2793 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2796 gen_ashift (ASHIFTRT, 1, wrk);
2797 emit_move_insn (operands[0], wrk);
2800 /* Expand a short sequence inline, longer call a magic routine. */
2801 else if (value <= 5)
2803 wrk = gen_reg_rtx (SImode);
2804 emit_move_insn (wrk, operands[1]);
2806 gen_ashift (ASHIFTRT, 1, wrk);
2807 emit_move_insn (operands[0], wrk);
2811 wrk = gen_reg_rtx (Pmode);
2813 /* Load the value into an arg reg and call a helper. */
2814 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2815 sprintf (func, "__ashiftrt_r4_%d", value);
2816 function_symbol (wrk, func, SFUNC_STATIC);
2817 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2818 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2823 sh_dynamicalize_shift_p (rtx count)
2825 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2828 /* Try to find a good way to implement the combiner pattern
2829 [(set (match_operand:SI 0 "register_operand" "r")
2830 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2831 (match_operand:SI 2 "const_int_operand" "n"))
2832 (match_operand:SI 3 "const_int_operand" "n"))) .
2833 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2834 return 0 for simple right / left or left/right shift combination.
2835 return 1 for a combination of shifts with zero_extend.
2836 return 2 for a combination of shifts with an AND that needs r0.
2837 return 3 for a combination of shifts with an AND that needs an extra
2838 scratch register, when the three highmost bits of the AND mask are clear.
2839 return 4 for a combination of shifts with an AND that needs an extra
2840 scratch register, when any of the three highmost bits of the AND mask
2842 If ATTRP is set, store an initial right shift width in ATTRP[0],
2843 and the instruction length in ATTRP[1] . These values are not valid
2845 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2846 shift_amounts for the last shift value that is to be used before the
2849 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2851 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2852 int left = INTVAL (left_rtx), right;
2854 int cost, best_cost = 10000;
2855 int best_right = 0, best_len = 0;
2859 if (left < 0 || left > 31)
2861 if (GET_CODE (mask_rtx) == CONST_INT)
2862 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2864 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2865 /* Can this be expressed as a right shift / left shift pair? */
2866 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2867 right = exact_log2 (lsb);
2868 mask2 = ~(mask + lsb - 1);
2869 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2870 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2872 best_cost = shift_insns[right] + shift_insns[right + left];
2873 /* mask has no trailing zeroes <==> ! right */
2874 else if (! right && mask2 == ~(lsb2 - 1))
2876 int late_right = exact_log2 (lsb2);
2877 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2879 /* Try to use zero extend. */
2880 if (mask2 == ~(lsb2 - 1))
2884 for (width = 8; width <= 16; width += 8)
2886 /* Can we zero-extend right away? */
2887 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2890 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2891 if (cost < best_cost)
2902 /* ??? Could try to put zero extend into initial right shift,
2903 or even shift a bit left before the right shift. */
2904 /* Determine value of first part of left shift, to get to the
2905 zero extend cut-off point. */
2906 first = width - exact_log2 (lsb2) + right;
2907 if (first >= 0 && right + left - first >= 0)
2909 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2910 + ext_shift_insns[right + left - first];
2911 if (cost < best_cost)
2923 /* Try to use r0 AND pattern */
2924 for (i = 0; i <= 2; i++)
2928 if (! CONST_OK_FOR_K08 (mask >> i))
2930 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2931 if (cost < best_cost)
2936 best_len = cost - 1;
2939 /* Try to use a scratch register to hold the AND operand. */
2940 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2941 for (i = 0; i <= 2; i++)
2945 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2946 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2947 if (cost < best_cost)
2952 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2958 attrp[0] = best_right;
2959 attrp[1] = best_len;
2964 /* This is used in length attributes of the unnamed instructions
2965 corresponding to shl_and_kind return values of 1 and 2. */
2967 shl_and_length (rtx insn)
2969 rtx set_src, left_rtx, mask_rtx;
2972 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2973 left_rtx = XEXP (XEXP (set_src, 0), 1);
2974 mask_rtx = XEXP (set_src, 1);
2975 shl_and_kind (left_rtx, mask_rtx, attributes);
2976 return attributes[1];
2979 /* This is used in length attribute of the and_shl_scratch instruction. */
2982 shl_and_scr_length (rtx insn)
2984 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2985 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2986 rtx op = XEXP (set_src, 0);
2987 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2988 op = XEXP (XEXP (op, 0), 0);
2989 return len + shift_insns[INTVAL (XEXP (op, 1))];
2992 /* Generate rtl for instructions for which shl_and_kind advised a particular
2993 method of generating them, i.e. returned zero. */
2996 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2999 unsigned HOST_WIDE_INT mask;
3000 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3001 int right, total_shift;
3002 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3004 right = attributes[0];
3005 total_shift = INTVAL (left_rtx) + right;
3006 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3013 int first = attributes[2];
3018 emit_insn ((mask << right) <= 0xff
3019 ? gen_zero_extendqisi2 (dest,
3020 gen_lowpart (QImode, source))
3021 : gen_zero_extendhisi2 (dest,
3022 gen_lowpart (HImode, source)));
3026 emit_insn (gen_movsi (dest, source));
3030 operands[2] = GEN_INT (right);
3031 gen_shifty_hi_op (LSHIFTRT, operands);
3035 operands[2] = GEN_INT (first);
3036 gen_shifty_hi_op (ASHIFT, operands);
3037 total_shift -= first;
3041 emit_insn (mask <= 0xff
3042 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3043 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3044 if (total_shift > 0)
3046 operands[2] = GEN_INT (total_shift);
3047 gen_shifty_hi_op (ASHIFT, operands);
3052 shift_gen_fun = gen_shifty_op;
3054 /* If the topmost bit that matters is set, set the topmost bits
3055 that don't matter. This way, we might be able to get a shorter
3057 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3058 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3060 /* Don't expand fine-grained when combining, because that will
3061 make the pattern fail. */
3062 if (currently_expanding_to_rtl
3063 || reload_in_progress || reload_completed)
3067 /* Cases 3 and 4 should be handled by this split
3068 only while combining */
3069 gcc_assert (kind <= 2);
3072 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3075 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3080 operands[2] = GEN_INT (total_shift);
3081 shift_gen_fun (ASHIFT, operands);
3088 if (kind != 4 && total_shift < 16)
3090 neg = -ext_shift_amounts[total_shift][1];
3092 neg -= ext_shift_amounts[total_shift][2];
3096 emit_insn (gen_and_shl_scratch (dest, source,
3099 GEN_INT (total_shift + neg),
3101 emit_insn (gen_movsi (dest, dest));
3108 /* Try to find a good way to implement the combiner pattern
3109 [(set (match_operand:SI 0 "register_operand" "=r")
3110 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3111 (match_operand:SI 2 "const_int_operand" "n")
3112 (match_operand:SI 3 "const_int_operand" "n")
3114 (clobber (reg:SI T_REG))]
3115 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3116 return 0 for simple left / right shift combination.
3117 return 1 for left shift / 8 bit sign extend / left shift.
3118 return 2 for left shift / 16 bit sign extend / left shift.
3119 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3120 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3121 return 5 for left shift / 16 bit sign extend / right shift
3122 return 6 for < 8 bit sign extend / left shift.
3123 return 7 for < 8 bit sign extend / left shift / single right shift.
3124 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3127 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3129 int left, size, insize, ext;
3130 int cost = 0, best_cost;
3133 left = INTVAL (left_rtx);
3134 size = INTVAL (size_rtx);
3135 insize = size - left;
3136 gcc_assert (insize > 0);
3137 /* Default to left / right shift. */
3139 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3142 /* 16 bit shift / sign extend / 16 bit shift */
3143 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3144 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3145 below, by alternative 3 or something even better. */
3146 if (cost < best_cost)
3152 /* Try a plain sign extend between two shifts. */
3153 for (ext = 16; ext >= insize; ext -= 8)
3157 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3158 if (cost < best_cost)
3160 kind = ext / (unsigned) 8;
3164 /* Check if we can do a sloppy shift with a final signed shift
3165 restoring the sign. */
3166 if (EXT_SHIFT_SIGNED (size - ext))
3167 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3168 /* If not, maybe it's still cheaper to do the second shift sloppy,
3169 and do a final sign extend? */
3170 else if (size <= 16)
3171 cost = ext_shift_insns[ext - insize] + 1
3172 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3175 if (cost < best_cost)
3177 kind = ext / (unsigned) 8 + 2;
3181 /* Check if we can sign extend in r0 */
3184 cost = 3 + shift_insns[left];
3185 if (cost < best_cost)
3190 /* Try the same with a final signed shift. */
3193 cost = 3 + ext_shift_insns[left + 1] + 1;
3194 if (cost < best_cost)
3203 /* Try to use a dynamic shift. */
3204 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3205 if (cost < best_cost)
3216 /* Function to be used in the length attribute of the instructions
3217 implementing this pattern. */
3220 shl_sext_length (rtx insn)
3222 rtx set_src, left_rtx, size_rtx;
3225 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3226 left_rtx = XEXP (XEXP (set_src, 0), 1);
3227 size_rtx = XEXP (set_src, 1);
3228 shl_sext_kind (left_rtx, size_rtx, &cost);
3232 /* Generate rtl for this pattern */
3235 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3238 int left, size, insize, cost;
3241 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3242 left = INTVAL (left_rtx);
3243 size = INTVAL (size_rtx);
3244 insize = size - left;
3252 int ext = kind & 1 ? 8 : 16;
3253 int shift2 = size - ext;
3255 /* Don't expand fine-grained when combining, because that will
3256 make the pattern fail. */
3257 if (! currently_expanding_to_rtl
3258 && ! reload_in_progress && ! reload_completed)
3260 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3261 emit_insn (gen_movsi (dest, source));
3265 emit_insn (gen_movsi (dest, source));
3269 operands[2] = GEN_INT (ext - insize);
3270 gen_shifty_hi_op (ASHIFT, operands);
3273 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3274 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3279 operands[2] = GEN_INT (shift2);
3280 gen_shifty_op (ASHIFT, operands);
3287 if (EXT_SHIFT_SIGNED (shift2))
3289 operands[2] = GEN_INT (shift2 + 1);
3290 gen_shifty_op (ASHIFT, operands);
3291 operands[2] = const1_rtx;
3292 gen_shifty_op (ASHIFTRT, operands);
3295 operands[2] = GEN_INT (shift2);
3296 gen_shifty_hi_op (ASHIFT, operands);
3300 operands[2] = GEN_INT (-shift2);
3301 gen_shifty_hi_op (LSHIFTRT, operands);
3303 emit_insn (size <= 8
3304 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3305 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3312 if (! currently_expanding_to_rtl
3313 && ! reload_in_progress && ! reload_completed)
3314 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3318 operands[2] = GEN_INT (16 - insize);
3319 gen_shifty_hi_op (ASHIFT, operands);
3320 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3322 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3324 gen_ashift (ASHIFTRT, 1, dest);
3329 /* Don't expand fine-grained when combining, because that will
3330 make the pattern fail. */
3331 if (! currently_expanding_to_rtl
3332 && ! reload_in_progress && ! reload_completed)
3334 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3335 emit_insn (gen_movsi (dest, source));
3338 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3339 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3340 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3342 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3343 gen_shifty_op (ASHIFT, operands);
3345 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3353 /* Prefix a symbol_ref name with "datalabel". */
3356 gen_datalabel_ref (rtx sym)
3360 if (GET_CODE (sym) == LABEL_REF)
3361 return gen_rtx_CONST (GET_MODE (sym),
3362 gen_rtx_UNSPEC (GET_MODE (sym),
3366 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3368 str = XSTR (sym, 0);
3369 /* Share all SYMBOL_REF strings with the same value - that is important
3371 str = IDENTIFIER_POINTER (get_identifier (str));
3372 XSTR (sym, 0) = str;
3378 static alloc_pool label_ref_list_pool;
3380 typedef struct label_ref_list_d
3383 struct label_ref_list_d *next;
3384 } *label_ref_list_t;
3386 /* The SH cannot load a large constant into a register, constants have to
3387 come from a pc relative load. The reference of a pc relative load
3388 instruction must be less than 1k in front of the instruction. This
3389 means that we often have to dump a constant inside a function, and
3390 generate code to branch around it.
3392 It is important to minimize this, since the branches will slow things
3393 down and make things bigger.
3395 Worst case code looks like:
3413 We fix this by performing a scan before scheduling, which notices which
3414 instructions need to have their operands fetched from the constant table
3415 and builds the table.
3419 scan, find an instruction which needs a pcrel move. Look forward, find the
3420 last barrier which is within MAX_COUNT bytes of the requirement.
3421 If there isn't one, make one. Process all the instructions between
3422 the find and the barrier.
3424 In the above example, we can tell that L3 is within 1k of L1, so
3425 the first move can be shrunk from the 3 insn+constant sequence into
3426 just 1 insn, and the constant moved to L3 to make:
3437 Then the second move becomes the target for the shortening process. */
3441 rtx value; /* Value in table. */
3442 rtx label; /* Label of value. */
3443 label_ref_list_t wend; /* End of window. */
3444 enum machine_mode mode; /* Mode of value. */
3446 /* True if this constant is accessed as part of a post-increment
3447 sequence. Note that HImode constants are never accessed in this way. */
3448 bool part_of_sequence_p;
3451 /* The maximum number of constants that can fit into one pool, since
3452 constants in the range 0..510 are at least 2 bytes long, and in the
3453 range from there to 1018 at least 4 bytes. */
3455 #define MAX_POOL_SIZE 372
3456 static pool_node pool_vector[MAX_POOL_SIZE];
3457 static int pool_size;
3458 static rtx pool_window_label;
3459 static int pool_window_last;
3461 static int max_labelno_before_reorg;
3463 /* ??? If we need a constant in HImode which is the truncated value of a
3464 constant we need in SImode, we could combine the two entries thus saving
3465 two bytes. Is this common enough to be worth the effort of implementing
3468 /* ??? This stuff should be done at the same time that we shorten branches.
3469 As it is now, we must assume that all branches are the maximum size, and
3470 this causes us to almost always output constant pools sooner than
3473 /* Add a constant to the pool and return its label. */
3476 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3480 label_ref_list_t ref, newref;
3482 /* First see if we've already got it. */
3483 for (i = 0; i < pool_size; i++)
3485 if (x->code == pool_vector[i].value->code
3486 && mode == pool_vector[i].mode)
3488 if (x->code == CODE_LABEL)
3490 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3493 if (rtx_equal_p (x, pool_vector[i].value))
3498 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3500 new = gen_label_rtx ();
3501 LABEL_REFS (new) = pool_vector[i].label;
3502 pool_vector[i].label = lab = new;
3504 if (lab && pool_window_label)
3506 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3507 newref->label = pool_window_label;
3508 ref = pool_vector[pool_window_last].wend;
3510 pool_vector[pool_window_last].wend = newref;
3513 pool_window_label = new;
3514 pool_window_last = i;
3520 /* Need a new one. */
3521 pool_vector[pool_size].value = x;
3522 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3525 pool_vector[pool_size - 1].part_of_sequence_p = true;
3528 lab = gen_label_rtx ();
3529 pool_vector[pool_size].mode = mode;
3530 pool_vector[pool_size].label = lab;
3531 pool_vector[pool_size].wend = NULL;
3532 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3533 if (lab && pool_window_label)
3535 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3536 newref->label = pool_window_label;
3537 ref = pool_vector[pool_window_last].wend;
3539 pool_vector[pool_window_last].wend = newref;
3542 pool_window_label = lab;
3543 pool_window_last = pool_size;
3548 /* Output the literal table. START, if nonzero, is the first instruction
3549 this table is needed for, and also indicates that there is at least one
3550 casesi_worker_2 instruction; We have to emit the operand3 labels from
3551 these insns at a 4-byte aligned position. BARRIER is the barrier
3552 after which we are to place the table. */
3555 dump_table (rtx start, rtx barrier)
3561 label_ref_list_t ref;
3564 /* Do two passes, first time dump out the HI sized constants. */
3566 for (i = 0; i < pool_size; i++)
3568 pool_node *p = &pool_vector[i];
3570 if (p->mode == HImode)
3574 scan = emit_insn_after (gen_align_2 (), scan);
3577 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3578 scan = emit_label_after (lab, scan);
3579 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3581 for (ref = p->wend; ref; ref = ref->next)
3584 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3587 else if (p->mode == DFmode)
3595 scan = emit_insn_after (gen_align_4 (), scan);
3597 for (; start != barrier; start = NEXT_INSN (start))
3598 if (GET_CODE (start) == INSN
3599 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3601 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3602 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3604 scan = emit_label_after (lab, scan);
3607 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3609 rtx align_insn = NULL_RTX;
3611 scan = emit_label_after (gen_label_rtx (), scan);
3612 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3615 for (i = 0; i < pool_size; i++)
3617 pool_node *p = &pool_vector[i];
3625 if (align_insn && !p->part_of_sequence_p)
3627 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3628 emit_label_before (lab, align_insn);
3629 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3631 for (ref = p->wend; ref; ref = ref->next)
3634 emit_insn_before (gen_consttable_window_end (lab),
3637 delete_insn (align_insn);
3638 align_insn = NULL_RTX;
3643 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3644 scan = emit_label_after (lab, scan);
3645 scan = emit_insn_after (gen_consttable_4 (p->value,
3647 need_align = ! need_align;
3653 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3658 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3659 scan = emit_label_after (lab, scan);
3660 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3667 if (p->mode != HImode)
3669 for (ref = p->wend; ref; ref = ref->next)
3672 scan = emit_insn_after (gen_consttable_window_end (lab),
3681 for (i = 0; i < pool_size; i++)
3683 pool_node *p = &pool_vector[i];
3694 scan = emit_label_after (gen_label_rtx (), scan);
3695 scan = emit_insn_after (gen_align_4 (), scan);
3697 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3698 scan = emit_label_after (lab, scan);
3699 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3707 scan = emit_label_after (gen_label_rtx (), scan);
3708 scan = emit_insn_after (gen_align_4 (), scan);
3710 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3711 scan = emit_label_after (lab, scan);
3712 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3719 if (p->mode != HImode)
3721 for (ref = p->wend; ref; ref = ref->next)
3724 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3729 scan = emit_insn_after (gen_consttable_end (), scan);
3730 scan = emit_barrier_after (scan);
3732 pool_window_label = NULL_RTX;
3733 pool_window_last = 0;
3736 /* Return nonzero if constant would be an ok source for a
3737 mov.w instead of a mov.l. */
3742 return (GET_CODE (src) == CONST_INT
3743 && INTVAL (src) >= -32768
3744 && INTVAL (src) <= 32767);
3747 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3749 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3751 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3752 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3753 need to fix it if the input value is CONST_OK_FOR_I08. */
3756 broken_move (rtx insn)
3758 if (GET_CODE (insn) == INSN)
3760 rtx pat = PATTERN (insn);
3761 if (GET_CODE (pat) == PARALLEL)
3762 pat = XVECEXP (pat, 0, 0);
3763 if (GET_CODE (pat) == SET
3764 /* We can load any 8-bit value if we don't care what the high
3765 order bits end up as. */
3766 && GET_MODE (SET_DEST (pat)) != QImode
3767 && (CONSTANT_P (SET_SRC (pat))
3768 /* Match mova_const. */
3769 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3770 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3771 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3773 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3774 && (fp_zero_operand (SET_SRC (pat))
3775 || fp_one_operand (SET_SRC (pat)))
3776 /* ??? If this is a -m4 or -m4-single compilation, in general
3777 we don't know the current setting of fpscr, so disable fldi.
3778 There is an exception if this was a register-register move
3779 before reload - and hence it was ascertained that we have
3780 single precision setting - and in a post-reload optimization
3781 we changed this to do a constant load. In that case
3782 we don't have an r0 clobber, hence we must use fldi. */
3783 && (! TARGET_SH4 || TARGET_FMOVD
3784 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3786 && GET_CODE (SET_DEST (pat)) == REG
3787 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3789 && GET_MODE (SET_DEST (pat)) == SImode
3790 && (satisfies_constraint_I20 (SET_SRC (pat))
3791 || satisfies_constraint_I28 (SET_SRC (pat))))
3792 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3802 return (GET_CODE (insn) == INSN
3803 && GET_CODE (PATTERN (insn)) == SET
3804 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3805 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3806 /* Don't match mova_const. */
3807 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3810 /* Fix up a mova from a switch that went out of range. */
3812 fixup_mova (rtx mova)
3814 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3817 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3818 INSN_CODE (mova) = -1;
3823 rtx lab = gen_label_rtx ();
3824 rtx wpat, wpat0, wpat1, wsrc, diff;
3828 worker = NEXT_INSN (worker);
3830 && GET_CODE (worker) != CODE_LABEL
3831 && GET_CODE (worker) != JUMP_INSN);
3832 } while (GET_CODE (worker) == NOTE
3833 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3834 wpat = PATTERN (worker);
3835 wpat0 = XVECEXP (wpat, 0, 0);
3836 wpat1 = XVECEXP (wpat, 0, 1);
3837 wsrc = SET_SRC (wpat0);
3838 PATTERN (worker) = (gen_casesi_worker_2
3839 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3840 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3842 INSN_CODE (worker) = -1;
3843 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3844 gen_rtx_LABEL_REF (Pmode, lab));
3845 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3846 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3847 INSN_CODE (mova) = -1;
3851 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3852 *num_mova, and check if the new mova is not nested within the first one.
3853 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3854 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3856 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3858 int n_addr = 0; /* Initialization to shut up spurious warning. */
3859 int f_target, n_target = 0; /* Likewise. */
3863 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3864 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3865 if (n_addr > n_target || n_addr + 1022 < n_target)
3867 /* Change the mova into a load.
3868 broken_move will then return true for it. */
3869 fixup_mova (new_mova);
3875 *first_mova = new_mova;
3880 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3885 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3886 > n_target - n_addr)
3888 fixup_mova (*first_mova);
3893 fixup_mova (new_mova);
3898 /* Find the last barrier from insn FROM which is close enough to hold the
3899 constant pool. If we can't find one, then create one near the end of
3903 find_barrier (int num_mova, rtx mova, rtx from)
3912 int leading_mova = num_mova;
3913 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3918 /* For HImode: range is 510, add 4 because pc counts from address of
3919 second instruction after this one, subtract 2 for the jump instruction
3920 that we may need to emit before the table, subtract 2 for the instruction
3921 that fills the jump delay slot (in very rare cases, reorg will take an
3922 instruction from after the constant pool or will leave the delay slot
3923 empty). This gives 510.
3924 For SImode: range is 1020, add 4 because pc counts from address of
3925 second instruction after this one, subtract 2 in case pc is 2 byte
3926 aligned, subtract 2 for the jump instruction that we may need to emit
3927 before the table, subtract 2 for the instruction that fills the jump
3928 delay slot. This gives 1018. */
3930 /* The branch will always be shortened now that the reference address for
3931 forward branches is the successor address, thus we need no longer make
3932 adjustments to the [sh]i_limit for -O0. */
3937 while (from && count_si < si_limit && count_hi < hi_limit)
3939 int inc = get_attr_length (from);
3942 /* If this is a label that existed at the time of the compute_alignments
3943 call, determine the alignment. N.B. When find_barrier recurses for
3944 an out-of-reach mova, we might see labels at the start of previously
3945 inserted constant tables. */
3946 if (GET_CODE (from) == CODE_LABEL
3947 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3950 new_align = 1 << label_to_alignment (from);
3951 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3952 new_align = 1 << barrier_align (from);
3957 /* In case we are scanning a constant table because of recursion, check
3958 for explicit alignments. If the table is long, we might be forced
3959 to emit the new table in front of it; the length of the alignment
3960 might be the last straw. */
3961 else if (GET_CODE (from) == INSN
3962 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3963 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3964 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3965 /* When we find the end of a constant table, paste the new constant
3966 at the end. That is better than putting it in front because
3967 this way, we don't need extra alignment for adding a 4-byte-aligned
3968 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3969 else if (GET_CODE (from) == INSN
3970 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3971 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3974 if (GET_CODE (from) == BARRIER)
3978 found_barrier = from;
3980 /* If we are at the end of the function, or in front of an alignment
3981 instruction, we need not insert an extra alignment. We prefer
3982 this kind of barrier. */
3983 if (barrier_align (from) > 2)
3984 good_barrier = from;
3986 /* If we are at the end of a hot/cold block, dump the constants
3988 next = NEXT_INSN (from);
3991 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3995 if (broken_move (from))
3998 enum machine_mode mode;
4000 pat = PATTERN (from);
4001 if (GET_CODE (pat) == PARALLEL)
4002 pat = XVECEXP (pat, 0, 0);
4003 src = SET_SRC (pat);
4004 dst = SET_DEST (pat);
4005 mode = GET_MODE (dst);
4007 /* We must explicitly check the mode, because sometimes the
4008 front end will generate code to load unsigned constants into
4009 HImode targets without properly sign extending them. */
4011 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4014 /* We put the short constants before the long constants, so
4015 we must count the length of short constants in the range
4016 for the long constants. */
4017 /* ??? This isn't optimal, but is easy to do. */
4022 /* We dump DF/DI constants before SF/SI ones, because
4023 the limit is the same, but the alignment requirements
4024 are higher. We may waste up to 4 additional bytes
4025 for alignment, and the DF/DI constant may have
4026 another SF/SI constant placed before it. */
4027 if (TARGET_SHCOMPACT
4029 && (mode == DFmode || mode == DImode))
4034 while (si_align > 2 && found_si + si_align - 2 > count_si)
4036 if (found_si > count_si)
4037 count_si = found_si;
4038 found_si += GET_MODE_SIZE (mode);
4040 si_limit -= GET_MODE_SIZE (mode);
4046 switch (untangle_mova (&num_mova, &mova, from))
4048 case 0: return find_barrier (0, 0, mova);
4053 = good_barrier ? good_barrier : found_barrier;
4057 if (found_si > count_si)
4058 count_si = found_si;
4060 else if (GET_CODE (from) == JUMP_INSN
4061 && (GET_CODE (PATTERN (from)) == ADDR_VEC
4062 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
4064 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4066 && (prev_nonnote_insn (from)
4067 == XEXP (MOVA_LABELREF (mova), 0))))
4069 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4071 /* We have just passed the barrier in front of the
4072 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4073 the ADDR_DIFF_VEC is accessed as data, just like our pool
4074 constants, this is a good opportunity to accommodate what
4075 we have gathered so far.
4076 If we waited any longer, we could end up at a barrier in
4077 front of code, which gives worse cache usage for separated
4078 instruction / data caches. */
4079 good_barrier = found_barrier;
4084 rtx body = PATTERN (from);
4085 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4088 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4089 else if (GET_CODE (from) == JUMP_INSN
4091 && ! TARGET_SMALLCODE)
4097 if (new_align > si_align)
4099 si_limit -= (count_si - 1) & (new_align - si_align);
4100 si_align = new_align;
4102 count_si = (count_si + new_align - 1) & -new_align;
4107 if (new_align > hi_align)
4109 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4110 hi_align = new_align;
4112 count_hi = (count_hi + new_align - 1) & -new_align;
4114 from = NEXT_INSN (from);
4121 /* Try as we might, the leading mova is out of range. Change
4122 it into a load (which will become a pcload) and retry. */
4124 return find_barrier (0, 0, mova);
4128 /* Insert the constant pool table before the mova instruction,
4129 to prevent the mova label reference from going out of range. */
4131 good_barrier = found_barrier = barrier_before_mova;
4137 if (good_barrier && next_real_insn (found_barrier))
4138 found_barrier = good_barrier;
4142 /* We didn't find a barrier in time to dump our stuff,
4143 so we'll make one. */
4144 rtx label = gen_label_rtx ();
4146 /* If we exceeded the range, then we must back up over the last
4147 instruction we looked at. Otherwise, we just need to undo the
4148 NEXT_INSN at the end of the loop. */
4149 if (PREV_INSN (from) != orig
4150 && (count_hi > hi_limit || count_si > si_limit))
4151 from = PREV_INSN (PREV_INSN (from));
4153 from = PREV_INSN (from);
4155 /* Walk back to be just before any jump or label.
4156 Putting it before a label reduces the number of times the branch
4157 around the constant pool table will be hit. Putting it before
4158 a jump makes it more likely that the bra delay slot will be
4160 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4161 || GET_CODE (from) == CODE_LABEL)
4162 from = PREV_INSN (from);
4164 from = emit_jump_insn_after (gen_jump (label), from);
4165 JUMP_LABEL (from) = label;
4166 LABEL_NUSES (label) = 1;
4167 found_barrier = emit_barrier_after (from);
4168 emit_label_after (label, found_barrier);
4171 return found_barrier;
4174 /* If the instruction INSN is implemented by a special function, and we can
4175 positively find the register that is used to call the sfunc, and this
4176 register is not used anywhere else in this instruction - except as the
4177 destination of a set, return this register; else, return 0. */
4179 sfunc_uses_reg (rtx insn)
4182 rtx pattern, part, reg_part, reg;
4184 if (GET_CODE (insn) != INSN)
4186 pattern = PATTERN (insn);
4187 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4190 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4192 part = XVECEXP (pattern, 0, i);
4193 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4198 reg = XEXP (reg_part, 0);
4199 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4201 part = XVECEXP (pattern, 0, i);
4202 if (part == reg_part || GET_CODE (part) == CLOBBER)
4204 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4205 && GET_CODE (SET_DEST (part)) == REG)
4206 ? SET_SRC (part) : part)))
4212 /* See if the only way in which INSN uses REG is by calling it, or by
4213 setting it while calling it. Set *SET to a SET rtx if the register
4217 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4223 reg2 = sfunc_uses_reg (insn);
4224 if (reg2 && REGNO (reg2) == REGNO (reg))
4226 pattern = single_set (insn);
4228 && GET_CODE (SET_DEST (pattern)) == REG
4229 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4233 if (GET_CODE (insn) != CALL_INSN)
4235 /* We don't use rtx_equal_p because we don't care if the mode is
4237 pattern = single_set (insn);
4239 && GET_CODE (SET_DEST (pattern)) == REG
4240 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4246 par = PATTERN (insn);
4247 if (GET_CODE (par) == PARALLEL)
4248 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4250 part = XVECEXP (par, 0, i);
4251 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4254 return reg_mentioned_p (reg, SET_SRC (pattern));
4260 pattern = PATTERN (insn);
4262 if (GET_CODE (pattern) == PARALLEL)
4266 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4267 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4269 pattern = XVECEXP (pattern, 0, 0);
4272 if (GET_CODE (pattern) == SET)
4274 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4276 /* We don't use rtx_equal_p, because we don't care if the
4277 mode is different. */
4278 if (GET_CODE (SET_DEST (pattern)) != REG
4279 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4285 pattern = SET_SRC (pattern);
4288 if (GET_CODE (pattern) != CALL
4289 || GET_CODE (XEXP (pattern, 0)) != MEM
4290 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4296 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4297 general registers. Bits 0..15 mean that the respective registers
4298 are used as inputs in the instruction. Bits 16..31 mean that the
4299 registers 0..15, respectively, are used as outputs, or are clobbered.
4300 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4302 regs_used (rtx x, int is_dest)
4310 code = GET_CODE (x);
4315 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4316 << (REGNO (x) + is_dest));
4320 rtx y = SUBREG_REG (x);
4322 if (GET_CODE (y) != REG)
4325 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4327 subreg_regno_offset (REGNO (y),
4330 GET_MODE (x)) + is_dest));
4334 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4336 /* If there was a return value, it must have been indicated with USE. */
4351 fmt = GET_RTX_FORMAT (code);
4353 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4358 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4359 used |= regs_used (XVECEXP (x, i, j), is_dest);
4361 else if (fmt[i] == 'e')
4362 used |= regs_used (XEXP (x, i), is_dest);
4367 /* Create an instruction that prevents redirection of a conditional branch
4368 to the destination of the JUMP with address ADDR.
4369 If the branch needs to be implemented as an indirect jump, try to find
4370 a scratch register for it.
4371 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4372 If any preceding insn that doesn't fit into a delay slot is good enough,
4373 pass 1. Pass 2 if a definite blocking insn is needed.
4374 -1 is used internally to avoid deep recursion.
4375 If a blocking instruction is made or recognized, return it. */
4378 gen_block_redirect (rtx jump, int addr, int need_block)
4381 rtx prev = prev_nonnote_insn (jump);
4384 /* First, check if we already have an instruction that satisfies our need. */
4385 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4387 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4389 if (GET_CODE (PATTERN (prev)) == USE
4390 || GET_CODE (PATTERN (prev)) == CLOBBER
4391 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4393 else if ((need_block &= ~1) < 0)
4395 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4398 if (GET_CODE (PATTERN (jump)) == RETURN)
4402 /* Reorg even does nasty things with return insns that cause branches
4403 to go out of range - see find_end_label and callers. */
4404 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4406 /* We can't use JUMP_LABEL here because it might be undefined
4407 when not optimizing. */
4408 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4409 /* If the branch is out of range, try to find a scratch register for it. */
4411 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4415 /* Don't look for the stack pointer as a scratch register,
4416 it would cause trouble if an interrupt occurred. */
4417 unsigned try = 0x7fff, used;
4418 int jump_left = flag_expensive_optimizations + 1;
4420 /* It is likely that the most recent eligible instruction is wanted for
4421 the delay slot. Therefore, find out which registers it uses, and
4422 try to avoid using them. */
4424 for (scan = jump; (scan = PREV_INSN (scan)); )
4428 if (INSN_DELETED_P (scan))
4430 code = GET_CODE (scan);
4431 if (code == CODE_LABEL || code == JUMP_INSN)
4434 && GET_CODE (PATTERN (scan)) != USE
4435 && GET_CODE (PATTERN (scan)) != CLOBBER
4436 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4438 try &= ~regs_used (PATTERN (scan), 0);
4442 for (used = dead = 0, scan = JUMP_LABEL (jump);
4443 (scan = NEXT_INSN (scan)); )
4447 if (INSN_DELETED_P (scan))
4449 code = GET_CODE (scan);
4452 used |= regs_used (PATTERN (scan), 0);
4453 if (code == CALL_INSN)
4454 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4455 dead |= (used >> 16) & ~used;
4461 if (code == JUMP_INSN)
4463 if (jump_left-- && simplejump_p (scan))
4464 scan = JUMP_LABEL (scan);
4470 /* Mask out the stack pointer again, in case it was
4471 the only 'free' register we have found. */
4474 /* If the immediate destination is still in range, check for possible
4475 threading with a jump beyond the delay slot insn.
4476 Don't check if we are called recursively; the jump has been or will be
4477 checked in a different invocation then. */
4479 else if (optimize && need_block >= 0)
4481 rtx next = next_active_insn (next_active_insn (dest));
4482 if (next && GET_CODE (next) == JUMP_INSN
4483 && GET_CODE (PATTERN (next)) == SET
4484 && recog_memoized (next) == CODE_FOR_jump_compact)
4486 dest = JUMP_LABEL (next);
4488 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4490 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4496 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4498 /* It would be nice if we could convert the jump into an indirect
4499 jump / far branch right now, and thus exposing all constituent
4500 instructions to further optimization. However, reorg uses
4501 simplejump_p to determine if there is an unconditional jump where
4502 it should try to schedule instructions from the target of the
4503 branch; simplejump_p fails for indirect jumps even if they have
4505 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4506 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4508 /* ??? We would like this to have the scope of the jump, but that
4509 scope will change when a delay slot insn of an inner scope is added.
4510 Hence, after delay slot scheduling, we'll have to expect
4511 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4514 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4515 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4518 else if (need_block)
4519 /* We can't use JUMP_LABEL here because it might be undefined
4520 when not optimizing. */
4521 return emit_insn_before (gen_block_branch_redirect
4522 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4527 #define CONDJUMP_MIN -252
4528 #define CONDJUMP_MAX 262
4531 /* A label (to be placed) in front of the jump
4532 that jumps to our ultimate destination. */
4534 /* Where we are going to insert it if we cannot move the jump any farther,
4535 or the jump itself if we have picked up an existing jump. */
4537 /* The ultimate destination. */
4539 struct far_branch *prev;
4540 /* If the branch has already been created, its address;
4541 else the address of its first prospective user. */
4545 static void gen_far_branch (struct far_branch *);
4546 enum mdep_reorg_phase_e mdep_reorg_phase;
4548 gen_far_branch (struct far_branch *bp)
4550 rtx insn = bp->insert_place;
4552 rtx label = gen_label_rtx ();
4555 emit_label_after (label, insn);
4558 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4559 LABEL_NUSES (bp->far_label)++;
4562 jump = emit_jump_insn_after (gen_return (), insn);
4563 /* Emit a barrier so that reorg knows that any following instructions
4564 are not reachable via a fall-through path.
4565 But don't do this when not optimizing, since we wouldn't suppress the
4566 alignment for the barrier then, and could end up with out-of-range
4567 pc-relative loads. */
4569 emit_barrier_after (jump);
4570 emit_label_after (bp->near_label, insn);
4571 JUMP_LABEL (jump) = bp->far_label;
4572 ok = invert_jump (insn, label, 1);
4575 /* If we are branching around a jump (rather than a return), prevent
4576 reorg from using an insn from the jump target as the delay slot insn -
4577 when reorg did this, it pessimized code (we rather hide the delay slot)
4578 and it could cause branches to go out of range. */
4581 (gen_stuff_delay_slot
4582 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4583 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4585 /* Prevent reorg from undoing our splits. */
4586 gen_block_redirect (jump, bp->address += 2, 2);
4589 /* Fix up ADDR_DIFF_VECs. */
4591 fixup_addr_diff_vecs (rtx first)
4595 for (insn = first; insn; insn = NEXT_INSN (insn))
4597 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4599 if (GET_CODE (insn) != JUMP_INSN
4600 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4602 pat = PATTERN (insn);
4603 vec_lab = XEXP (XEXP (pat, 0), 0);
4605 /* Search the matching casesi_jump_2. */
4606 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4608 if (GET_CODE (prev) != JUMP_INSN)
4610 prevpat = PATTERN (prev);
4611 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4613 x = XVECEXP (prevpat, 0, 1);
4614 if (GET_CODE (x) != USE)
4617 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4620 /* FIXME: This is a bug in the optimizer, but it seems harmless
4621 to just avoid panicing. */
4625 /* Emit the reference label of the braf where it belongs, right after
4626 the casesi_jump_2 (i.e. braf). */
4627 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4628 emit_label_after (braf_label, prev);
4630 /* Fix up the ADDR_DIF_VEC to be relative
4631 to the reference address of the braf. */
4632 XEXP (XEXP (pat, 0), 0) = braf_label;
4636 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4637 a barrier. Return the base 2 logarithm of the desired alignment. */
4639 barrier_align (rtx barrier_or_label)
4641 rtx next = next_real_insn (barrier_or_label), pat, prev;
4642 int slot, credit, jump_to_next = 0;
4647 pat = PATTERN (next);
4649 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4652 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4653 /* This is a barrier in front of a constant table. */
4656 prev = prev_real_insn (barrier_or_label);
4657 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4659 pat = PATTERN (prev);
4660 /* If this is a very small table, we want to keep the alignment after
4661 the table to the minimum for proper code alignment. */
4662 return ((TARGET_SMALLCODE
4663 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4664 <= (unsigned) 1 << (CACHE_LOG - 2)))
4665 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4668 if (TARGET_SMALLCODE)
4671 if (! TARGET_SH2 || ! optimize)
4672 return align_jumps_log;
4674 /* When fixing up pcloads, a constant table might be inserted just before
4675 the basic block that ends with the barrier. Thus, we can't trust the
4676 instruction lengths before that. */
4677 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4679 /* Check if there is an immediately preceding branch to the insn beyond
4680 the barrier. We must weight the cost of discarding useful information
4681 from the current cache line when executing this branch and there is
4682 an alignment, against that of fetching unneeded insn in front of the
4683 branch target when there is no alignment. */
4685 /* There are two delay_slot cases to consider. One is the simple case
4686 where the preceding branch is to the insn beyond the barrier (simple
4687 delay slot filling), and the other is where the preceding branch has
4688 a delay slot that is a duplicate of the insn after the barrier
4689 (fill_eager_delay_slots) and the branch is to the insn after the insn
4690 after the barrier. */
4692 /* PREV is presumed to be the JUMP_INSN for the barrier under
4693 investigation. Skip to the insn before it. */
4694 prev = prev_real_insn (prev);
4696 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4697 credit >= 0 && prev && GET_CODE (prev) == INSN;
4698 prev = prev_real_insn (prev))
4701 if (GET_CODE (PATTERN (prev)) == USE
4702 || GET_CODE (PATTERN (prev)) == CLOBBER)
4704 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4706 prev = XVECEXP (PATTERN (prev), 0, 1);
4707 if (INSN_UID (prev) == INSN_UID (next))
4709 /* Delay slot was filled with insn at jump target. */
4716 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4718 credit -= get_attr_length (prev);
4721 && GET_CODE (prev) == JUMP_INSN
4722 && JUMP_LABEL (prev))
4726 || next_real_insn (JUMP_LABEL (prev)) == next
4727 /* If relax_delay_slots() decides NEXT was redundant
4728 with some previous instruction, it will have
4729 redirected PREV's jump to the following insn. */
4730 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4731 /* There is no upper bound on redundant instructions
4732 that might have been skipped, but we must not put an
4733 alignment where none had been before. */
4734 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4736 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4737 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4738 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4740 rtx pat = PATTERN (prev);
4741 if (GET_CODE (pat) == PARALLEL)
4742 pat = XVECEXP (pat, 0, 0);
4743 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4749 return align_jumps_log;
4752 /* If we are inside a phony loop, almost any kind of label can turn up as the
4753 first one in the loop. Aligning a braf label causes incorrect switch
4754 destination addresses; we can detect braf labels because they are
4755 followed by a BARRIER.
4756 Applying loop alignment to small constant or switch tables is a waste
4757 of space, so we suppress this too. */
4759 sh_loop_align (rtx label)
4764 next = next_nonnote_insn (next);
4765 while (next && GET_CODE (next) == CODE_LABEL);
4769 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4770 || recog_memoized (next) == CODE_FOR_consttable_2)
4773 return align_loops_log;
4776 /* Do a final pass over the function, just before delayed branch
4782 rtx first, insn, mova = NULL_RTX;
4784 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4785 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4787 first = get_insns ();
4788 max_labelno_before_reorg = max_label_num ();
4790 /* We must split call insns before introducing `mova's. If we're
4791 optimizing, they'll have already been split. Otherwise, make
4792 sure we don't split them too late. */
4794 split_all_insns_noflow ();
4799 /* If relaxing, generate pseudo-ops to associate function calls with
4800 the symbols they call. It does no harm to not generate these
4801 pseudo-ops. However, when we can generate them, it enables to
4802 linker to potentially relax the jsr to a bsr, and eliminate the
4803 register load and, possibly, the constant pool entry. */
4805 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4808 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4809 own purposes. This works because none of the remaining passes
4810 need to look at them.
4812 ??? But it may break in the future. We should use a machine
4813 dependent REG_NOTE, or some other approach entirely. */
4814 for (insn = first; insn; insn = NEXT_INSN (insn))
4820 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4822 remove_note (insn, note);
4826 for (insn = first; insn; insn = NEXT_INSN (insn))
4828 rtx pattern, reg, link, set, scan, dies, label;
4829 int rescan = 0, foundinsn = 0;
4831 if (GET_CODE (insn) == CALL_INSN)
4833 pattern = PATTERN (insn);
4835 if (GET_CODE (pattern) == PARALLEL)
4836 pattern = XVECEXP (pattern, 0, 0);
4837 if (GET_CODE (pattern) == SET)
4838 pattern = SET_SRC (pattern);
4840 if (GET_CODE (pattern) != CALL
4841 || GET_CODE (XEXP (pattern, 0)) != MEM)
4844 reg = XEXP (XEXP (pattern, 0), 0);
4848 reg = sfunc_uses_reg (insn);
4853 if (GET_CODE (reg) != REG)
4856 /* Try scanning backward to find where the register is set. */
4858 for (scan = PREV_INSN (insn);
4859 scan && GET_CODE (scan) != CODE_LABEL;
4860 scan = PREV_INSN (scan))
4862 if (! INSN_P (scan))
4865 if (! reg_mentioned_p (reg, scan))
4868 if (noncall_uses_reg (reg, scan, &set))
4881 /* The register is set at LINK. */
4883 /* We can only optimize the function call if the register is
4884 being set to a symbol. In theory, we could sometimes
4885 optimize calls to a constant location, but the assembler
4886 and linker do not support that at present. */
4887 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4888 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4891 /* Scan forward from LINK to the place where REG dies, and
4892 make sure that the only insns which use REG are
4893 themselves function calls. */
4895 /* ??? This doesn't work for call targets that were allocated
4896 by reload, since there may not be a REG_DEAD note for the
4900 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4904 /* Don't try to trace forward past a CODE_LABEL if we haven't
4905 seen INSN yet. Ordinarily, we will only find the setting insn
4906 if it is in the same basic block. However,
4907 cross-jumping can insert code labels in between the load and
4908 the call, and can result in situations where a single call
4909 insn may have two targets depending on where we came from. */
4911 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4914 if (! INSN_P (scan))
4917 /* Don't try to trace forward past a JUMP. To optimize
4918 safely, we would have to check that all the
4919 instructions at the jump destination did not use REG. */
4921 if (GET_CODE (scan) == JUMP_INSN)
4924 if (! reg_mentioned_p (reg, scan))
4927 if (noncall_uses_reg (reg, scan, &scanset))
4934 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4936 /* There is a function call to this register other
4937 than the one we are checking. If we optimize
4938 this call, we need to rescan again below. */
4942 /* ??? We shouldn't have to worry about SCANSET here.
4943 We should just be able to check for a REG_DEAD note
4944 on a function call. However, the REG_DEAD notes are
4945 apparently not dependable around libcalls; c-torture
4946 execute/920501-2 is a test case. If SCANSET is set,
4947 then this insn sets the register, so it must have
4948 died earlier. Unfortunately, this will only handle
4949 the cases in which the register is, in fact, set in a
4952 /* ??? We shouldn't have to use FOUNDINSN here.
4953 This dates back to when we used LOG_LINKS to find
4954 the most recent insn which sets the register. */
4958 || find_reg_note (scan, REG_DEAD, reg)))
4967 /* Either there was a branch, or some insn used REG
4968 other than as a function call address. */
4972 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4973 on the insn which sets the register, and on each call insn
4974 which uses the register. In final_prescan_insn we look for
4975 the REG_LABEL_OPERAND notes, and output the appropriate label
4978 label = gen_label_rtx ();
4979 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4981 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4990 scan = NEXT_INSN (scan);
4992 && ((GET_CODE (scan) == CALL_INSN
4993 && reg_mentioned_p (reg, scan))
4994 || ((reg2 = sfunc_uses_reg (scan))
4995 && REGNO (reg2) == REGNO (reg))))
4997 = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
5000 while (scan != dies);
5006 fixup_addr_diff_vecs (first);
5010 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5011 shorten_branches (first);
5014 /* Scan the function looking for move instructions which have to be
5015 changed to pc-relative loads and insert the literal tables. */
5016 label_ref_list_pool = create_alloc_pool ("label references list",
5017 sizeof (struct label_ref_list_d),
5019 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5020 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5024 /* ??? basic block reordering can move a switch table dispatch
5025 below the switch table. Check if that has happened.
5026 We only have the addresses available when optimizing; but then,
5027 this check shouldn't be needed when not optimizing. */
5028 if (!untangle_mova (&num_mova, &mova, insn))
5034 else if (GET_CODE (insn) == JUMP_INSN
5035 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5037 /* ??? loop invariant motion can also move a mova out of a
5038 loop. Since loop does this code motion anyway, maybe we
5039 should wrap UNSPEC_MOVA into a CONST, so that reload can
5042 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5043 || (prev_nonnote_insn (insn)
5044 == XEXP (MOVA_LABELREF (mova), 0))))
5051 /* Some code might have been inserted between the mova and
5052 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5053 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5054 total += get_attr_length (scan);
5056 /* range of mova is 1020, add 4 because pc counts from address of
5057 second instruction after this one, subtract 2 in case pc is 2
5058 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5059 cancels out with alignment effects of the mova itself. */
5062 /* Change the mova into a load, and restart scanning
5063 there. broken_move will then return true for mova. */
5068 if (broken_move (insn)
5069 || (GET_CODE (insn) == INSN
5070 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5073 /* Scan ahead looking for a barrier to stick the constant table
5075 rtx barrier = find_barrier (num_mova, mova, insn);
5076 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5077 int need_aligned_label = 0;
5079 if (num_mova && ! mova_p (mova))
5081 /* find_barrier had to change the first mova into a
5082 pcload; thus, we have to start with this new pcload. */
5086 /* Now find all the moves between the points and modify them. */
5087 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5089 if (GET_CODE (scan) == CODE_LABEL)
5091 if (GET_CODE (scan) == INSN
5092 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5093 need_aligned_label = 1;
5094 if (broken_move (scan))
5096 rtx *patp = &PATTERN (scan), pat = *patp;
5100 enum machine_mode mode;
5102 if (GET_CODE (pat) == PARALLEL)
5103 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5104 src = SET_SRC (pat);
5105 dst = SET_DEST (pat);
5106 mode = GET_MODE (dst);
5108 if (mode == SImode && hi_const (src)
5109 && REGNO (dst) != FPUL_REG)
5114 while (GET_CODE (dst) == SUBREG)
5116 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5117 GET_MODE (SUBREG_REG (dst)),
5120 dst = SUBREG_REG (dst);
5122 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5124 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5126 /* This must be an insn that clobbers r0. */
5127 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5128 XVECLEN (PATTERN (scan), 0)
5130 rtx clobber = *clobberp;
5132 gcc_assert (GET_CODE (clobber) == CLOBBER
5133 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5136 && reg_set_between_p (r0_rtx, last_float_move, scan))
5140 && GET_MODE_SIZE (mode) != 4
5141 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5143 lab = add_constant (src, mode, last_float);
5145 emit_insn_before (gen_mova (lab), scan);
5148 /* There will be a REG_UNUSED note for r0 on
5149 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5150 lest reorg:mark_target_live_regs will not
5151 consider r0 to be used, and we end up with delay
5152 slot insn in front of SCAN that clobbers r0. */
5154 = find_regno_note (last_float_move, REG_UNUSED, 0);
5156 /* If we are not optimizing, then there may not be
5159 PUT_MODE (note, REG_INC);
5161 *last_float_addr = r0_inc_rtx;
5163 last_float_move = scan;
5165 newsrc = gen_const_mem (mode,
5166 (((TARGET_SH4 && ! TARGET_FMOVD)
5167 || REGNO (dst) == FPUL_REG)
5170 last_float_addr = &XEXP (newsrc, 0);
5172 /* Remove the clobber of r0. */
5173 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5174 gen_rtx_SCRATCH (Pmode));
5176 /* This is a mova needing a label. Create it. */
5177 else if (GET_CODE (src) == UNSPEC
5178 && XINT (src, 1) == UNSPEC_MOVA
5179 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5181 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5182 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5183 newsrc = gen_rtx_UNSPEC (SImode,
5184 gen_rtvec (1, newsrc),
5189 lab = add_constant (src, mode, 0);
5190 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5191 newsrc = gen_const_mem (mode, newsrc);
5193 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5194 INSN_CODE (scan) = -1;
5197 dump_table (need_aligned_label ? insn : 0, barrier);
5201 free_alloc_pool (label_ref_list_pool);
5202 for (insn = first; insn; insn = NEXT_INSN (insn))
5203 PUT_MODE (insn, VOIDmode);
5205 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5206 INSN_ADDRESSES_FREE ();
5207 split_branches (first);
5209 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5210 also has an effect on the register that holds the address of the sfunc.
5211 Insert an extra dummy insn in front of each sfunc that pretends to
5212 use this register. */
5213 if (flag_delayed_branch)
5215 for (insn = first; insn; insn = NEXT_INSN (insn))
5217 rtx reg = sfunc_uses_reg (insn);
5221 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5225 /* fpscr is not actually a user variable, but we pretend it is for the
5226 sake of the previous optimization passes, since we want it handled like
5227 one. However, we don't have any debugging information for it, so turn
5228 it into a non-user variable now. */
5230 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5232 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5236 get_dest_uid (rtx label, int max_uid)
5238 rtx dest = next_real_insn (label);
5241 /* This can happen for an undefined label. */
5243 dest_uid = INSN_UID (dest);
5244 /* If this is a newly created branch redirection blocking instruction,
5245 we cannot index the branch_uid or insn_addresses arrays with its
5246 uid. But then, we won't need to, because the actual destination is
5247 the following branch. */
5248 while (dest_uid >= max_uid)
5250 dest = NEXT_INSN (dest);
5251 dest_uid = INSN_UID (dest);
5253 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5258 /* Split condbranches that are out of range. Also add clobbers for
5259 scratch registers that are needed in far jumps.
5260 We do this before delay slot scheduling, so that it can take our
5261 newly created instructions into account. It also allows us to
5262 find branches with common targets more easily. */
5265 split_branches (rtx first)
5268 struct far_branch **uid_branch, *far_branch_list = 0;
5269 int max_uid = get_max_uid ();
5272 /* Find out which branches are out of range. */
5273 shorten_branches (first);
5275 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5276 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5278 for (insn = first; insn; insn = NEXT_INSN (insn))
5279 if (! INSN_P (insn))
5281 else if (INSN_DELETED_P (insn))
5283 /* Shorten_branches would split this instruction again,
5284 so transform it into a note. */
5285 SET_INSN_DELETED (insn);
5287 else if (GET_CODE (insn) == JUMP_INSN
5288 /* Don't mess with ADDR_DIFF_VEC */
5289 && (GET_CODE (PATTERN (insn)) == SET
5290 || GET_CODE (PATTERN (insn)) == RETURN))
5292 enum attr_type type = get_attr_type (insn);
5293 if (type == TYPE_CBRANCH)
5297 if (get_attr_length (insn) > 4)
5299 rtx src = SET_SRC (PATTERN (insn));
5300 rtx olabel = XEXP (XEXP (src, 1), 0);
5301 int addr = INSN_ADDRESSES (INSN_UID (insn));
5303 int dest_uid = get_dest_uid (olabel, max_uid);
5304 struct far_branch *bp = uid_branch[dest_uid];
5306 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5307 the label if the LABEL_NUSES count drops to zero. There is
5308 always a jump_optimize pass that sets these values, but it
5309 proceeds to delete unreferenced code, and then if not
5310 optimizing, to un-delete the deleted instructions, thus
5311 leaving labels with too low uses counts. */
5314 JUMP_LABEL (insn) = olabel;
5315 LABEL_NUSES (olabel)++;
5319 bp = (struct far_branch *) alloca (sizeof *bp);
5320 uid_branch[dest_uid] = bp;
5321 bp->prev = far_branch_list;
5322 far_branch_list = bp;
5324 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5325 LABEL_NUSES (bp->far_label)++;
5329 label = bp->near_label;
5330 if (! label && bp->address - addr >= CONDJUMP_MIN)
5332 rtx block = bp->insert_place;
5334 if (GET_CODE (PATTERN (block)) == RETURN)
5335 block = PREV_INSN (block);
5337 block = gen_block_redirect (block,
5339 label = emit_label_after (gen_label_rtx (),
5341 bp->near_label = label;
5343 else if (label && ! NEXT_INSN (label))
5345 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5346 bp->insert_place = insn;
5348 gen_far_branch (bp);
5352 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5354 bp->near_label = label = gen_label_rtx ();
5355 bp->insert_place = insn;
5358 ok = redirect_jump (insn, label, 0);
5363 /* get_attr_length (insn) == 2 */
5364 /* Check if we have a pattern where reorg wants to redirect
5365 the branch to a label from an unconditional branch that
5367 /* We can't use JUMP_LABEL here because it might be undefined
5368 when not optimizing. */
5369 /* A syntax error might cause beyond to be NULL_RTX. */
5371 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5375 && (GET_CODE (beyond) == JUMP_INSN
5376 || ((beyond = next_active_insn (beyond))
5377 && GET_CODE (beyond) == JUMP_INSN))
5378 && GET_CODE (PATTERN (beyond)) == SET
5379 && recog_memoized (beyond) == CODE_FOR_jump_compact
5381 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5382 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5384 gen_block_redirect (beyond,
5385 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5388 next = next_active_insn (insn);
5390 if ((GET_CODE (next) == JUMP_INSN
5391 || ((next = next_active_insn (next))
5392 && GET_CODE (next) == JUMP_INSN))
5393 && GET_CODE (PATTERN (next)) == SET
5394 && recog_memoized (next) == CODE_FOR_jump_compact
5396 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5397 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5399 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5401 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5403 int addr = INSN_ADDRESSES (INSN_UID (insn));
5406 struct far_branch *bp;
5408 if (type == TYPE_JUMP)
5410 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5411 dest_uid = get_dest_uid (far_label, max_uid);
5414 /* Parse errors can lead to labels outside
5416 if (! NEXT_INSN (far_label))
5421 JUMP_LABEL (insn) = far_label;
5422 LABEL_NUSES (far_label)++;
5424 redirect_jump (insn, NULL_RTX, 1);
5428 bp = uid_branch[dest_uid];
5431 bp = (struct far_branch *) alloca (sizeof *bp);
5432 uid_branch[dest_uid] = bp;
5433 bp->prev = far_branch_list;
5434 far_branch_list = bp;
5436 bp->far_label = far_label;
5438 LABEL_NUSES (far_label)++;
5440 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5441 if (addr - bp->address <= CONDJUMP_MAX)
5442 emit_label_after (bp->near_label, PREV_INSN (insn));
5445 gen_far_branch (bp);
5451 bp->insert_place = insn;
5453 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5455 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5458 /* Generate all pending far branches,
5459 and free our references to the far labels. */
5460 while (far_branch_list)
5462 if (far_branch_list->near_label
5463 && ! NEXT_INSN (far_branch_list->near_label))
5464 gen_far_branch (far_branch_list);
5466 && far_branch_list->far_label
5467 && ! --LABEL_NUSES (far_branch_list->far_label))
5468 delete_insn (far_branch_list->far_label);
5469 far_branch_list = far_branch_list->prev;
5472 /* Instruction length information is no longer valid due to the new
5473 instructions that have been generated. */
5474 init_insn_lengths ();
5477 /* Dump out instruction addresses, which is useful for debugging the
5478 constant pool table stuff.
5480 If relaxing, output the label and pseudo-ops used to link together
5481 calls and the instruction which set the registers. */
5483 /* ??? The addresses printed by this routine for insns are nonsense for
5484 insns which are inside of a sequence where none of the inner insns have
5485 variable length. This is because the second pass of shorten_branches
5486 does not bother to update them. */
5489 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5490 int noperands ATTRIBUTE_UNUSED)
5492 if (TARGET_DUMPISIZE)
5493 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5499 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5504 pattern = PATTERN (insn);
5505 if (GET_CODE (pattern) == PARALLEL)
5506 pattern = XVECEXP (pattern, 0, 0);
5507 switch (GET_CODE (pattern))
5510 if (GET_CODE (SET_SRC (pattern)) != CALL
5511 && get_attr_type (insn) != TYPE_SFUNC)
5513 targetm.asm_out.internal_label
5514 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5517 /* else FALLTHROUGH */
5519 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5520 CODE_LABEL_NUMBER (XEXP (note, 0)));
5530 /* Dump out any constants accumulated in the final pass. These will
5534 output_jump_label_table (void)
5540 fprintf (asm_out_file, "\t.align 2\n");
5541 for (i = 0; i < pool_size; i++)
5543 pool_node *p = &pool_vector[i];
5545 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5546 CODE_LABEL_NUMBER (p->label));
5547 output_asm_insn (".long %O0", &p->value);
5555 /* A full frame looks like:
5559 [ if current_function_anonymous_args
5572 local-0 <- fp points here. */
5574 /* Number of bytes pushed for anonymous args, used to pass information
5575 between expand_prologue and expand_epilogue. */
5577 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5578 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5579 for an epilogue and a negative value means that it's for a sibcall
5580 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5581 all the registers that are about to be restored, and hence dead. */
5584 output_stack_adjust (int size, rtx reg, int epilogue_p,
5585 HARD_REG_SET *live_regs_mask)
5587 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5590 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5592 /* This test is bogus, as output_stack_adjust is used to re-align the
5595 gcc_assert (!(size % align));
5598 if (CONST_OK_FOR_ADD (size))
5599 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5600 /* Try to do it with two partial adjustments; however, we must make
5601 sure that the stack is properly aligned at all times, in case
5602 an interrupt occurs between the two partial adjustments. */
5603 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5604 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5606 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5607 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5613 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5616 /* If TEMP is invalid, we could temporarily save a general
5617 register to MACL. However, there is currently no need
5618 to handle this case, so just die when we see it. */
5620 || current_function_interrupt
5621 || ! call_really_used_regs[temp] || fixed_regs[temp])
5623 if (temp < 0 && ! current_function_interrupt
5624 && (TARGET_SHMEDIA || epilogue_p >= 0))
5627 COPY_HARD_REG_SET (temps, call_used_reg_set);
5628 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5632 if (crtl->return_rtx)
5634 enum machine_mode mode;
5635 mode = GET_MODE (crtl->return_rtx);
5636 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5637 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5639 for (i = 0; i < nreg; i++)
5640 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5641 if (current_function_calls_eh_return)
5643 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5644 for (i = 0; i <= 3; i++)
5645 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5648 if (TARGET_SHMEDIA && epilogue_p < 0)
5649 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5650 CLEAR_HARD_REG_BIT (temps, i);
5651 if (epilogue_p <= 0)
5653 for (i = FIRST_PARM_REG;
5654 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5655 CLEAR_HARD_REG_BIT (temps, i);
5656 if (cfun->static_chain_decl != NULL)
5657 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5659 temp = scavenge_reg (&temps);
5661 if (temp < 0 && live_regs_mask)
5665 COPY_HARD_REG_SET (temps, *live_regs_mask);
5666 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5667 temp = scavenge_reg (&temps);
5671 rtx adj_reg, tmp_reg, mem;
5673 /* If we reached here, the most likely case is the (sibcall)
5674 epilogue for non SHmedia. Put a special push/pop sequence
5675 for such case as the last resort. This looks lengthy but
5676 would not be problem because it seems to be very
5679 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5682 /* ??? There is still the slight possibility that r4 or
5683 r5 have been reserved as fixed registers or assigned
5684 as global registers, and they change during an
5685 interrupt. There are possible ways to handle this:
5687 - If we are adjusting the frame pointer (r14), we can do
5688 with a single temp register and an ordinary push / pop
5690 - Grab any call-used or call-saved registers (i.e. not
5691 fixed or globals) for the temps we need. We might
5692 also grab r14 if we are adjusting the stack pointer.
5693 If we can't find enough available registers, issue
5694 a diagnostic and die - the user must have reserved
5695 way too many registers.
5696 But since all this is rather unlikely to happen and
5697 would require extra testing, we just die if r4 / r5
5698 are not available. */
5699 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5700 && !global_regs[4] && !global_regs[5]);
5702 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5703 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5704 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5705 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5706 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5707 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5708 emit_move_insn (mem, tmp_reg);
5709 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5710 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5711 emit_move_insn (mem, tmp_reg);
5712 emit_move_insn (reg, adj_reg);
5713 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5714 emit_move_insn (adj_reg, mem);
5715 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5716 emit_move_insn (tmp_reg, mem);
5717 /* Tell flow the insns that pop r4/r5 aren't dead. */
5718 emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
5719 emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
5722 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5724 /* If SIZE is negative, subtract the positive value.
5725 This sometimes allows a constant pool entry to be shared
5726 between prologue and epilogue code. */
5729 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5730 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5734 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5735 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5739 = (gen_rtx_EXPR_LIST
5740 (REG_FRAME_RELATED_EXPR,
5741 gen_rtx_SET (VOIDmode, reg,
5742 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5752 RTX_FRAME_RELATED_P (x) = 1;
5756 /* Output RTL to push register RN onto the stack. */
5763 x = gen_push_fpul ();
5764 else if (rn == FPSCR_REG)
5765 x = gen_push_fpscr ();
5766 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5767 && FP_OR_XD_REGISTER_P (rn))
5769 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5771 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5773 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5774 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5776 x = gen_push (gen_rtx_REG (SImode, rn));
5780 = gen_rtx_EXPR_LIST (REG_INC,
5781 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5785 /* Output RTL to pop register RN from the stack. */
5792 x = gen_pop_fpul ();
5793 else if (rn == FPSCR_REG)
5794 x = gen_pop_fpscr ();
5795 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5796 && FP_OR_XD_REGISTER_P (rn))
5798 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5800 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5802 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5803 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5805 x = gen_pop (gen_rtx_REG (SImode, rn));
5809 = gen_rtx_EXPR_LIST (REG_INC,
5810 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5813 /* Generate code to push the regs specified in the mask. */
5816 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5818 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5821 /* Push PR last; this gives better latencies after the prologue, and
5822 candidates for the return delay slot when there are no general
5823 registers pushed. */
5824 for (; i < FIRST_PSEUDO_REGISTER; i++)
5826 /* If this is an interrupt handler, and the SZ bit varies,
5827 and we have to push any floating point register, we need
5828 to switch to the correct precision first. */
5829 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5830 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5832 HARD_REG_SET unsaved;
5835 COMPL_HARD_REG_SET (unsaved, *mask);
5836 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5840 && (i != FPSCR_REG || ! skip_fpscr)
5841 && TEST_HARD_REG_BIT (*mask, i))
5843 /* If the ISR has RESBANK attribute assigned, don't push any of
5844 the following registers - R0-R14, MACH, MACL and GBR. */
5845 if (! (sh_cfun_resbank_handler_p ()
5846 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
5854 /* Push banked registers last to improve delay slot opportunities. */
5855 if (interrupt_handler)
5856 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5857 if (TEST_HARD_REG_BIT (*mask, i))
5860 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
5861 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
5865 /* Calculate how much extra space is needed to save all callee-saved
5867 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5870 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5873 int stack_space = 0;
5874 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5876 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5877 if ((! call_really_used_regs[reg] || interrupt_handler)
5878 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5879 /* Leave space to save this target register on the stack,
5880 in case target register allocation wants to use it. */
5881 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5885 /* Decide whether we should reserve space for callee-save target registers,
5886 in case target register allocation wants to use them. REGS_SAVED is
5887 the space, in bytes, that is already required for register saves.
5888 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5891 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5892 HARD_REG_SET *live_regs_mask)
5896 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5899 /* Decide how much space to reserve for callee-save target registers
5900 in case target register allocation wants to use them.
5901 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5904 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5906 if (shmedia_space_reserved_for_target_registers)
5907 return shmedia_target_regs_stack_space (live_regs_mask);
5912 /* Work out the registers which need to be saved, both as a mask and a
5913 count of saved words. Return the count.
5915 If doing a pragma interrupt function, then push all regs used by the
5916 function, and if we call another function (we can tell by looking at PR),
5917 make sure that all the regs it clobbers are safe too. */
5920 calc_live_regs (HARD_REG_SET *live_regs_mask)
5925 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5926 bool nosave_low_regs;
5927 int pr_live, has_call;
5929 attrs = DECL_ATTRIBUTES (current_function_decl);
5930 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5931 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5932 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5933 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5935 CLEAR_HARD_REG_SET (*live_regs_mask);
5936 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5937 && df_regs_ever_live_p (FPSCR_REG))
5938 target_flags &= ~MASK_FPU_SINGLE;
5939 /* If we can save a lot of saves by switching to double mode, do that. */
5940 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5941 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5942 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5943 && (! call_really_used_regs[reg]
5944 || interrupt_handler)
5947 target_flags &= ~MASK_FPU_SINGLE;
5950 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5951 knows how to use it. That means the pseudo originally allocated for
5952 the initial value can become the PR_MEDIA_REG hard register, as seen for
5953 execute/20010122-1.c:test9. */
5955 /* ??? this function is called from initial_elimination_offset, hence we
5956 can't use the result of sh_media_register_for_return here. */
5957 pr_live = sh_pr_n_sets ();
5960 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5961 pr_live = (pr_initial
5962 ? (GET_CODE (pr_initial) != REG
5963 || REGNO (pr_initial) != (PR_REG))
5964 : df_regs_ever_live_p (PR_REG));
5965 /* For Shcompact, if not optimizing, we end up with a memory reference
5966 using the return address pointer for __builtin_return_address even
5967 though there is no actual need to put the PR register on the stack. */
5968 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5970 /* Force PR to be live if the prologue has to call the SHmedia
5971 argument decoder or register saver. */
5972 if (TARGET_SHCOMPACT
5973 && ((crtl->args.info.call_cookie
5974 & ~ CALL_COOKIE_RET_TRAMP (1))
5975 || current_function_saves_all_registers))
5977 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5978 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5980 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5983 ? (/* Need to save all the regs ever live. */
5984 (df_regs_ever_live_p (reg)
5985 || (call_really_used_regs[reg]
5986 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5987 || reg == PIC_OFFSET_TABLE_REGNUM)
5989 || (TARGET_SHMEDIA && has_call
5990 && REGISTER_NATURAL_MODE (reg) == SImode
5991 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5992 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5993 && reg != RETURN_ADDRESS_POINTER_REGNUM
5994 && reg != T_REG && reg != GBR_REG
5995 /* Push fpscr only on targets which have FPU */
5996 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5997 : (/* Only push those regs which are used and need to be saved. */
6000 && crtl->args.info.call_cookie
6001 && reg == PIC_OFFSET_TABLE_REGNUM)
6002 || (df_regs_ever_live_p (reg)
6003 && (!call_really_used_regs[reg]
6004 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6005 || (current_function_calls_eh_return
6006 && (reg == EH_RETURN_DATA_REGNO (0)
6007 || reg == EH_RETURN_DATA_REGNO (1)
6008 || reg == EH_RETURN_DATA_REGNO (2)
6009 || reg == EH_RETURN_DATA_REGNO (3)))
6010 || ((reg == MACL_REG || reg == MACH_REG)
6011 && df_regs_ever_live_p (reg)
6012 && sh_cfun_attr_renesas_p ())
6015 SET_HARD_REG_BIT (*live_regs_mask, reg);
6016 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6018 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6019 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6021 if (FP_REGISTER_P (reg))
6023 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6025 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6026 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6029 else if (XD_REGISTER_P (reg))
6031 /* Must switch to double mode to access these registers. */
6032 target_flags &= ~MASK_FPU_SINGLE;
6036 if (nosave_low_regs && reg == R8_REG)
6039 /* If we have a target register optimization pass after prologue / epilogue
6040 threading, we need to assume all target registers will be live even if
6042 if (flag_branch_target_load_optimize2
6043 && TARGET_SAVE_ALL_TARGET_REGS
6044 && shmedia_space_reserved_for_target_registers)
6045 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6046 if ((! call_really_used_regs[reg] || interrupt_handler)
6047 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6049 SET_HARD_REG_BIT (*live_regs_mask, reg);
6050 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6052 /* If this is an interrupt handler, we don't have any call-clobbered
6053 registers we can conveniently use for target register save/restore.
6054 Make sure we save at least one general purpose register when we need
6055 to save target registers. */
6056 if (interrupt_handler
6057 && hard_reg_set_intersect_p (*live_regs_mask,
6058 reg_class_contents[TARGET_REGS])
6059 && ! hard_reg_set_intersect_p (*live_regs_mask,
6060 reg_class_contents[GENERAL_REGS]))
6062 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6063 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6069 /* Code to generate prologue and epilogue sequences */
6071 /* PUSHED is the number of bytes that are being pushed on the
6072 stack for register saves. Return the frame size, padded
6073 appropriately so that the stack stays properly aligned. */
6074 static HOST_WIDE_INT
6075 rounded_frame_size (int pushed)
6077 HOST_WIDE_INT size = get_frame_size ();
6078 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6080 return ((size + pushed + align - 1) & -align) - pushed;
6083 /* Choose a call-clobbered target-branch register that remains
6084 unchanged along the whole function. We set it up as the return
6085 value in the prologue. */
6087 sh_media_register_for_return (void)
6092 if (! current_function_is_leaf)
6094 if (lookup_attribute ("interrupt_handler",
6095 DECL_ATTRIBUTES (current_function_decl)))
6097 if (sh_cfun_interrupt_handler_p ())
6100 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6102 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6103 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6109 /* The maximum registers we need to save are:
6110 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6111 - 32 floating point registers (for each pair, we save none,
6112 one single precision value, or a double precision value).
6113 - 8 target registers
6114 - add 1 entry for a delimiter. */
6115 #define MAX_SAVED_REGS (62+32+8)
6117 typedef struct save_entry_s
6126 /* There will be a delimiter entry with VOIDmode both at the start and the
6127 end of a filled in schedule. The end delimiter has the offset of the
6128 save with the smallest (i.e. most negative) offset. */
6129 typedef struct save_schedule_s
6131 save_entry entries[MAX_SAVED_REGS + 2];
6132 int temps[MAX_TEMPS+1];
6135 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6136 use reverse order. Returns the last entry written to (not counting
6137 the delimiter). OFFSET_BASE is a number to be added to all offset
6141 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6145 save_entry *entry = schedule->entries;
6149 if (! current_function_interrupt)
6150 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6151 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6152 && ! FUNCTION_ARG_REGNO_P (i)
6153 && i != FIRST_RET_REG
6154 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6155 && ! (current_function_calls_eh_return
6156 && (i == EH_RETURN_STACKADJ_REGNO
6157 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6158 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6159 schedule->temps[tmpx++] = i;
6161 entry->mode = VOIDmode;
6162 entry->offset = offset_base;
6164 /* We loop twice: first, we save 8-byte aligned registers in the
6165 higher addresses, that are known to be aligned. Then, we
6166 proceed to saving 32-bit registers that don't need 8-byte
6168 If this is an interrupt function, all registers that need saving
6169 need to be saved in full. moreover, we need to postpone saving
6170 target registers till we have saved some general purpose registers
6171 we can then use as scratch registers. */
6172 offset = offset_base;
6173 for (align = 1; align >= 0; align--)
6175 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6176 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6178 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6181 if (current_function_interrupt)
6183 if (TARGET_REGISTER_P (i))
6185 if (GENERAL_REGISTER_P (i))
6188 if (mode == SFmode && (i % 2) == 1
6189 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6190 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6197 /* If we're doing the aligned pass and this is not aligned,
6198 or we're doing the unaligned pass and this is aligned,
6200 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6204 if (current_function_interrupt
6205 && GENERAL_REGISTER_P (i)
6206 && tmpx < MAX_TEMPS)
6207 schedule->temps[tmpx++] = i;
6209 offset -= GET_MODE_SIZE (mode);
6212 entry->offset = offset;
6215 if (align && current_function_interrupt)
6216 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6217 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6219 offset -= GET_MODE_SIZE (DImode);
6221 entry->mode = DImode;
6222 entry->offset = offset;
6227 entry->mode = VOIDmode;
6228 entry->offset = offset;
6229 schedule->temps[tmpx] = -1;
6234 sh_expand_prologue (void)
6236 HARD_REG_SET live_regs_mask;
6239 int save_flags = target_flags;
6242 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6244 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6246 /* We have pretend args if we had an object sent partially in registers
6247 and partially on the stack, e.g. a large structure. */
6248 pretend_args = crtl->args.pretend_args_size;
6249 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6250 && (NPARM_REGS(SImode)
6251 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6253 output_stack_adjust (-pretend_args
6254 - crtl->args.info.stack_regs * 8,
6255 stack_pointer_rtx, 0, NULL);
6257 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6258 /* We're going to use the PIC register to load the address of the
6259 incoming-argument decoder and/or of the return trampoline from
6260 the GOT, so make sure the PIC register is preserved and
6262 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6264 if (TARGET_SHCOMPACT
6265 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6269 /* First, make all registers with incoming arguments that will
6270 be pushed onto the stack live, so that register renaming
6271 doesn't overwrite them. */
6272 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6273 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6274 >= NPARM_REGS (SImode) - reg)
6275 for (; reg < NPARM_REGS (SImode); reg++)
6276 emit_insn (gen_shcompact_preserve_incoming_args
6277 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6278 else if (CALL_COOKIE_INT_REG_GET
6279 (crtl->args.info.call_cookie, reg) == 1)
6280 emit_insn (gen_shcompact_preserve_incoming_args
6281 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6283 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6285 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6286 GEN_INT (crtl->args.info.call_cookie));
6287 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6288 gen_rtx_REG (SImode, R0_REG));
6290 else if (TARGET_SHMEDIA)
6292 int tr = sh_media_register_for_return ();
6295 emit_move_insn (gen_rtx_REG (DImode, tr),
6296 gen_rtx_REG (DImode, PR_MEDIA_REG));
6299 /* Emit the code for SETUP_VARARGS. */
6300 if (current_function_stdarg)
6302 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6304 /* Push arg regs as if they'd been provided by caller in stack. */
6305 for (i = 0; i < NPARM_REGS(SImode); i++)
6307 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6310 if (i >= (NPARM_REGS(SImode)
6311 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6315 RTX_FRAME_RELATED_P (insn) = 0;
6320 /* If we're supposed to switch stacks at function entry, do so now. */
6323 /* The argument specifies a variable holding the address of the
6324 stack the interrupt function should switch to/from at entry/exit. */
6326 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6327 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6329 emit_insn (gen_sp_switch_1 (sp_switch));
6332 d = calc_live_regs (&live_regs_mask);
6333 /* ??? Maybe we could save some switching if we can move a mode switch
6334 that already happens to be at the function start into the prologue. */
6335 if (target_flags != save_flags && ! current_function_interrupt)
6336 emit_insn (gen_toggle_sz ());
6340 int offset_base, offset;
6342 int offset_in_r0 = -1;
6344 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6345 int total_size, save_size;
6346 save_schedule schedule;
6350 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6351 && ! current_function_interrupt)
6352 r0 = gen_rtx_REG (Pmode, R0_REG);
6354 /* D is the actual number of bytes that we need for saving registers,
6355 however, in initial_elimination_offset we have committed to using
6356 an additional TREGS_SPACE amount of bytes - in order to keep both
6357 addresses to arguments supplied by the caller and local variables
6358 valid, we must keep this gap. Place it between the incoming
6359 arguments and the actually saved registers in a bid to optimize
6360 locality of reference. */
6361 total_size = d + tregs_space;
6362 total_size += rounded_frame_size (total_size);
6363 save_size = total_size - rounded_frame_size (d);
6364 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6365 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6366 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6368 /* If adjusting the stack in a single step costs nothing extra, do so.
6369 I.e. either if a single addi is enough, or we need a movi anyway,
6370 and we don't exceed the maximum offset range (the test for the
6371 latter is conservative for simplicity). */
6373 && (CONST_OK_FOR_I10 (-total_size)
6374 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6375 && total_size <= 2044)))
6376 d_rounding = total_size - save_size;
6378 offset_base = d + d_rounding;
6380 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6383 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6384 tmp_pnt = schedule.temps;
6385 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6387 enum machine_mode mode = entry->mode;
6388 unsigned int reg = entry->reg;
6389 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6392 offset = entry->offset;
6394 reg_rtx = gen_rtx_REG (mode, reg);
6396 mem_rtx = gen_frame_mem (mode,
6397 gen_rtx_PLUS (Pmode,
6401 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6408 if (HAVE_PRE_DECREMENT
6409 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6410 || mem_rtx == NULL_RTX
6411 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6413 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6415 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6424 offset += GET_MODE_SIZE (mode);
6428 if (mem_rtx != NULL_RTX)
6431 if (offset_in_r0 == -1)
6433 emit_move_insn (r0, GEN_INT (offset));
6434 offset_in_r0 = offset;
6436 else if (offset != offset_in_r0)
6441 GEN_INT (offset - offset_in_r0)));
6442 offset_in_r0 += offset - offset_in_r0;
6445 if (pre_dec != NULL_RTX)
6451 (Pmode, r0, stack_pointer_rtx));
6455 offset -= GET_MODE_SIZE (mode);
6456 offset_in_r0 -= GET_MODE_SIZE (mode);
6461 mem_rtx = gen_frame_mem (mode, r0);
6463 mem_rtx = gen_frame_mem (mode,
6464 gen_rtx_PLUS (Pmode,
6468 /* We must not use an r0-based address for target-branch
6469 registers or for special registers without pre-dec
6470 memory addresses, since we store their values in r0
6472 gcc_assert (!TARGET_REGISTER_P (reg)
6473 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6474 || mem_rtx == pre_dec));
6477 orig_reg_rtx = reg_rtx;
6478 if (TARGET_REGISTER_P (reg)
6479 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6480 && mem_rtx != pre_dec))
6482 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6484 emit_move_insn (tmp_reg, reg_rtx);
6486 if (REGNO (tmp_reg) == R0_REG)
6490 gcc_assert (!refers_to_regno_p
6491 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6494 if (*++tmp_pnt <= 0)
6495 tmp_pnt = schedule.temps;
6502 /* Mark as interesting for dwarf cfi generator */
6503 insn = emit_move_insn (mem_rtx, reg_rtx);
6504 RTX_FRAME_RELATED_P (insn) = 1;
6505 /* If we use an intermediate register for the save, we can't
6506 describe this exactly in cfi as a copy of the to-be-saved
6507 register into the temporary register and then the temporary
6508 register on the stack, because the temporary register can
6509 have a different natural size than the to-be-saved register.
6510 Thus, we gloss over the intermediate copy and pretend we do
6511 a direct save from the to-be-saved register. */
6512 if (REGNO (reg_rtx) != reg)
6516 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6517 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6519 REG_NOTES (insn) = note_rtx;
6522 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6524 rtx reg_rtx = gen_rtx_REG (mode, reg);
6526 rtx mem_rtx = gen_frame_mem (mode,
6527 gen_rtx_PLUS (Pmode,
6531 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6532 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6534 REG_NOTES (insn) = note_rtx;
6539 gcc_assert (entry->offset == d_rounding);
6542 push_regs (&live_regs_mask, current_function_interrupt);
6544 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6545 emit_insn (gen_GOTaddr2picreg ());
6547 if (SHMEDIA_REGS_STACK_ADJUST ())
6549 /* This must NOT go through the PLT, otherwise mach and macl
6550 may be clobbered. */
6551 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6553 ? "__GCC_push_shmedia_regs"
6554 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6555 emit_insn (gen_shmedia_save_restore_regs_compact
6556 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6559 if (target_flags != save_flags && ! current_function_interrupt)
6560 emit_insn (gen_toggle_sz ());
6562 target_flags = save_flags;
6564 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6565 stack_pointer_rtx, 0, NULL);
6567 if (frame_pointer_needed)
6568 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6570 if (TARGET_SHCOMPACT
6571 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6573 /* This must NOT go through the PLT, otherwise mach and macl
6574 may be clobbered. */
6575 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6576 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6577 emit_insn (gen_shcompact_incoming_args ());
6582 sh_expand_epilogue (bool sibcall_p)
6584 HARD_REG_SET live_regs_mask;
6588 int save_flags = target_flags;
6589 int frame_size, save_size;
6590 int fpscr_deferred = 0;
6591 int e = sibcall_p ? -1 : 1;
6593 d = calc_live_regs (&live_regs_mask);
6596 frame_size = rounded_frame_size (d);
6600 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6602 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6603 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6604 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6606 total_size = d + tregs_space;
6607 total_size += rounded_frame_size (total_size);
6608 save_size = total_size - frame_size;
6610 /* If adjusting the stack in a single step costs nothing extra, do so.
6611 I.e. either if a single addi is enough, or we need a movi anyway,
6612 and we don't exceed the maximum offset range (the test for the
6613 latter is conservative for simplicity). */
6615 && ! frame_pointer_needed
6616 && (CONST_OK_FOR_I10 (total_size)
6617 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6618 && total_size <= 2044)))
6619 d_rounding = frame_size;
6621 frame_size -= d_rounding;
6624 if (frame_pointer_needed)
6626 /* We must avoid scheduling the epilogue with previous basic blocks
6627 when exception handling is enabled. See PR/18032. */
6628 if (flag_exceptions)
6629 emit_insn (gen_blockage ());
6630 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6633 /* We must avoid moving the stack pointer adjustment past code
6634 which reads from the local frame, else an interrupt could
6635 occur after the SP adjustment and clobber data in the local
6637 emit_insn (gen_blockage ());
6638 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6640 else if (frame_size)
6642 /* We must avoid moving the stack pointer adjustment past code
6643 which reads from the local frame, else an interrupt could
6644 occur after the SP adjustment and clobber data in the local
6646 emit_insn (gen_blockage ());
6647 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6650 if (SHMEDIA_REGS_STACK_ADJUST ())
6652 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6654 ? "__GCC_pop_shmedia_regs"
6655 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6656 /* This must NOT go through the PLT, otherwise mach and macl
6657 may be clobbered. */
6658 emit_insn (gen_shmedia_save_restore_regs_compact
6659 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6662 /* Pop all the registers. */
6664 if (target_flags != save_flags && ! current_function_interrupt)
6665 emit_insn (gen_toggle_sz ());
6668 int offset_base, offset;
6669 int offset_in_r0 = -1;
6671 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6672 save_schedule schedule;
6676 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6677 offset_base = -entry[1].offset + d_rounding;
6678 tmp_pnt = schedule.temps;
6679 for (; entry->mode != VOIDmode; entry--)
6681 enum machine_mode mode = entry->mode;
6682 int reg = entry->reg;
6683 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6685 offset = offset_base + entry->offset;
6686 reg_rtx = gen_rtx_REG (mode, reg);
6688 mem_rtx = gen_frame_mem (mode,
6689 gen_rtx_PLUS (Pmode,
6693 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6699 if (HAVE_POST_INCREMENT
6700 && (offset == offset_in_r0
6701 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6702 && mem_rtx == NULL_RTX)
6703 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6705 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6707 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6710 post_inc = NULL_RTX;
6719 if (mem_rtx != NULL_RTX)
6722 if (offset_in_r0 == -1)
6724 emit_move_insn (r0, GEN_INT (offset));
6725 offset_in_r0 = offset;
6727 else if (offset != offset_in_r0)
6732 GEN_INT (offset - offset_in_r0)));
6733 offset_in_r0 += offset - offset_in_r0;
6736 if (post_inc != NULL_RTX)
6742 (Pmode, r0, stack_pointer_rtx));
6748 offset_in_r0 += GET_MODE_SIZE (mode);
6751 mem_rtx = gen_frame_mem (mode, r0);
6753 mem_rtx = gen_frame_mem (mode,
6754 gen_rtx_PLUS (Pmode,
6758 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6759 || mem_rtx == post_inc);
6762 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6763 && mem_rtx != post_inc)
6765 insn = emit_move_insn (r0, mem_rtx);
6768 else if (TARGET_REGISTER_P (reg))
6770 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6772 /* Give the scheduler a bit of freedom by using up to
6773 MAX_TEMPS registers in a round-robin fashion. */
6774 insn = emit_move_insn (tmp_reg, mem_rtx);
6777 tmp_pnt = schedule.temps;
6780 insn = emit_move_insn (reg_rtx, mem_rtx);
6783 gcc_assert (entry->offset + offset_base == d + d_rounding);
6785 else /* ! TARGET_SH5 */
6790 /* For an ISR with RESBANK attribute assigned, don't pop PR
6792 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
6793 && !sh_cfun_resbank_handler_p ())
6795 if (!frame_pointer_needed)
6796 emit_insn (gen_blockage ());
6800 /* Banked registers are poped first to avoid being scheduled in the
6801 delay slot. RTE switches banks before the ds instruction. */
6802 if (current_function_interrupt)
6804 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6805 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6806 pop (LAST_BANKED_REG - i);
6808 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6811 last_reg = FIRST_PSEUDO_REGISTER;
6813 for (i = 0; i < last_reg; i++)
6815 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6817 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6818 && hard_reg_set_intersect_p (live_regs_mask,
6819 reg_class_contents[DF_REGS]))
6821 /* For an ISR with RESBANK attribute assigned, don't pop
6822 following registers, R0-R14, MACH, MACL and GBR. */
6823 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
6824 && ! (sh_cfun_resbank_handler_p ()
6825 && ((j >= FIRST_GENERAL_REG
6826 && j < LAST_GENERAL_REG)
6832 if (j == FIRST_FP_REG && fpscr_deferred)
6836 if (target_flags != save_flags && ! current_function_interrupt)
6837 emit_insn (gen_toggle_sz ());
6838 target_flags = save_flags;
6840 output_stack_adjust (crtl->args.pretend_args_size
6841 + save_size + d_rounding
6842 + crtl->args.info.stack_regs * 8,
6843 stack_pointer_rtx, e, NULL);
6845 if (current_function_calls_eh_return)
6846 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6847 EH_RETURN_STACKADJ_RTX));
6849 /* Switch back to the normal stack if necessary. */
6850 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6851 emit_insn (gen_sp_switch_2 ());
6853 /* Tell flow the insn that pops PR isn't dead. */
6854 /* PR_REG will never be live in SHmedia mode, and we don't need to
6855 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6856 by the return pattern. */
6857 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6858 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6861 static int sh_need_epilogue_known = 0;
6864 sh_need_epilogue (void)
6866 if (! sh_need_epilogue_known)
6871 sh_expand_epilogue (0);
6872 epilogue = get_insns ();
6874 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6876 return sh_need_epilogue_known > 0;
6879 /* Emit code to change the current function's return address to RA.
6880 TEMP is available as a scratch register, if needed. */
6883 sh_set_return_address (rtx ra, rtx tmp)
6885 HARD_REG_SET live_regs_mask;
6887 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6890 d = calc_live_regs (&live_regs_mask);
6892 /* If pr_reg isn't life, we can set it (or the register given in
6893 sh_media_register_for_return) directly. */
6894 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6900 int rr_regno = sh_media_register_for_return ();
6905 rr = gen_rtx_REG (DImode, rr_regno);
6908 rr = gen_rtx_REG (SImode, pr_reg);
6910 emit_insn (GEN_MOV (rr, ra));
6911 /* Tell flow the register for return isn't dead. */
6912 emit_insn (gen_rtx_USE (VOIDmode, rr));
6919 save_schedule schedule;
6922 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6923 offset = entry[1].offset;
6924 for (; entry->mode != VOIDmode; entry--)
6925 if (entry->reg == pr_reg)
6928 /* We can't find pr register. */
6932 offset = entry->offset - offset;
6933 pr_offset = (rounded_frame_size (d) + offset
6934 + SHMEDIA_REGS_STACK_ADJUST ());
6937 pr_offset = rounded_frame_size (d);
6939 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6940 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6942 tmp = gen_frame_mem (Pmode, tmp);
6943 emit_insn (GEN_MOV (tmp, ra));
6946 /* Clear variables at function end. */
6949 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6950 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6952 sh_need_epilogue_known = 0;
6956 sh_builtin_saveregs (void)
6958 /* First unnamed integer register. */
6959 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
6960 /* Number of integer registers we need to save. */
6961 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6962 /* First unnamed SFmode float reg */
6963 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
6964 /* Number of SFmode float regs to save. */
6965 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6968 alias_set_type alias_set;
6974 int pushregs = n_intregs;
6976 while (pushregs < NPARM_REGS (SImode) - 1
6977 && (CALL_COOKIE_INT_REG_GET
6978 (crtl->args.info.call_cookie,
6979 NPARM_REGS (SImode) - pushregs)
6982 crtl->args.info.call_cookie
6983 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6988 if (pushregs == NPARM_REGS (SImode))
6989 crtl->args.info.call_cookie
6990 |= (CALL_COOKIE_INT_REG (0, 1)
6991 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6993 crtl->args.info.call_cookie
6994 |= CALL_COOKIE_STACKSEQ (pushregs);
6996 crtl->args.pretend_args_size += 8 * n_intregs;
6998 if (TARGET_SHCOMPACT)
7002 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7004 error ("__builtin_saveregs not supported by this subtarget");
7011 /* Allocate block of memory for the regs. */
7012 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7013 Or can assign_stack_local accept a 0 SIZE argument? */
7014 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7017 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7018 else if (n_floatregs & 1)
7022 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7023 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7024 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7025 regbuf = change_address (regbuf, BLKmode, addr);
7027 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7031 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7032 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7033 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7034 emit_insn (gen_andsi3 (addr, addr, mask));
7035 regbuf = change_address (regbuf, BLKmode, addr);
7038 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7039 alias_set = get_varargs_alias_set ();
7040 set_mem_alias_set (regbuf, alias_set);
7043 This is optimized to only save the regs that are necessary. Explicitly
7044 named args need not be saved. */
7046 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7047 adjust_address (regbuf, BLKmode,
7048 n_floatregs * UNITS_PER_WORD),
7052 /* Return the address of the regbuf. */
7053 return XEXP (regbuf, 0);
7056 This is optimized to only save the regs that are necessary. Explicitly
7057 named args need not be saved.
7058 We explicitly build a pointer to the buffer because it halves the insn
7059 count when not optimizing (otherwise the pointer is built for each reg
7061 We emit the moves in reverse order so that we can use predecrement. */
7063 fpregs = copy_to_mode_reg (Pmode,
7064 plus_constant (XEXP (regbuf, 0),
7065 n_floatregs * UNITS_PER_WORD));
7066 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7069 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7071 emit_insn (gen_addsi3 (fpregs, fpregs,
7072 GEN_INT (-2 * UNITS_PER_WORD)));
7073 mem = change_address (regbuf, DFmode, fpregs);
7074 emit_move_insn (mem,
7075 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7077 regno = first_floatreg;
7080 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7081 mem = change_address (regbuf, SFmode, fpregs);
7082 emit_move_insn (mem,
7083 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7084 - (TARGET_LITTLE_ENDIAN != 0)));
7088 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7092 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7093 mem = change_address (regbuf, SFmode, fpregs);
7094 emit_move_insn (mem,
7095 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7098 /* Return the address of the regbuf. */
7099 return XEXP (regbuf, 0);
7102 /* Define the `__builtin_va_list' type for the ABI. */
7105 sh_build_builtin_va_list (void)
7107 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7110 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7111 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7112 return ptr_type_node;
7114 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7116 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7118 f_next_o_limit = build_decl (FIELD_DECL,
7119 get_identifier ("__va_next_o_limit"),
7121 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7123 f_next_fp_limit = build_decl (FIELD_DECL,
7124 get_identifier ("__va_next_fp_limit"),
7126 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7129 DECL_FIELD_CONTEXT (f_next_o) = record;
7130 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7131 DECL_FIELD_CONTEXT (f_next_fp) = record;
7132 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7133 DECL_FIELD_CONTEXT (f_next_stack) = record;
7135 TYPE_FIELDS (record) = f_next_o;
7136 TREE_CHAIN (f_next_o) = f_next_o_limit;
7137 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7138 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7139 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7141 layout_type (record);
7146 /* Implement `va_start' for varargs and stdarg. */
7149 sh_va_start (tree valist, rtx nextarg)
7151 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7152 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7158 expand_builtin_saveregs ();
7159 std_expand_builtin_va_start (valist, nextarg);
7163 if ((! TARGET_SH2E && ! TARGET_SH4)
7164 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7166 std_expand_builtin_va_start (valist, nextarg);
7170 f_next_o = TYPE_FIELDS (va_list_type_node);
7171 f_next_o_limit = TREE_CHAIN (f_next_o);
7172 f_next_fp = TREE_CHAIN (f_next_o_limit);
7173 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7174 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7176 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7178 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7179 valist, f_next_o_limit, NULL_TREE);
7180 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7182 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7183 valist, f_next_fp_limit, NULL_TREE);
7184 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7185 valist, f_next_stack, NULL_TREE);
7187 /* Call __builtin_saveregs. */
7188 u = make_tree (sizetype, expand_builtin_saveregs ());
7189 u = fold_convert (ptr_type_node, u);
7190 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7191 TREE_SIDE_EFFECTS (t) = 1;
7192 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7194 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7199 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7200 size_int (UNITS_PER_WORD * nfp));
7201 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7202 TREE_SIDE_EFFECTS (t) = 1;
7203 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7205 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7206 TREE_SIDE_EFFECTS (t) = 1;
7207 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7209 nint = crtl->args.info.arg_count[SH_ARG_INT];
7214 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7215 size_int (UNITS_PER_WORD * nint));
7216 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7217 TREE_SIDE_EFFECTS (t) = 1;
7218 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7220 u = make_tree (ptr_type_node, nextarg);
7221 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7222 TREE_SIDE_EFFECTS (t) = 1;
7223 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7226 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7227 member, return it. */
7229 find_sole_member (tree type)
7231 tree field, member = NULL_TREE;
7233 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7235 if (TREE_CODE (field) != FIELD_DECL)
7237 if (!DECL_SIZE (field))
7239 if (integer_zerop (DECL_SIZE (field)))
7247 /* Implement `va_arg'. */
7250 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7251 tree *post_p ATTRIBUTE_UNUSED)
7253 HOST_WIDE_INT size, rsize;
7254 tree tmp, pptr_type_node;
7255 tree addr, lab_over = NULL, result = NULL;
7256 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7260 type = build_pointer_type (type);
7262 size = int_size_in_bytes (type);
7263 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7264 pptr_type_node = build_pointer_type (ptr_type_node);
7266 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7267 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7269 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7270 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7275 f_next_o = TYPE_FIELDS (va_list_type_node);
7276 f_next_o_limit = TREE_CHAIN (f_next_o);
7277 f_next_fp = TREE_CHAIN (f_next_o_limit);
7278 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7279 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7281 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7283 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7284 valist, f_next_o_limit, NULL_TREE);
7285 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7286 valist, f_next_fp, NULL_TREE);
7287 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7288 valist, f_next_fp_limit, NULL_TREE);
7289 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7290 valist, f_next_stack, NULL_TREE);
7292 /* Structures with a single member with a distinct mode are passed
7293 like their member. This is relevant if the latter has a REAL_TYPE
7294 or COMPLEX_TYPE type. */
7296 while (TREE_CODE (eff_type) == RECORD_TYPE
7297 && (member = find_sole_member (eff_type))
7298 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7299 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7300 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7302 tree field_type = TREE_TYPE (member);
7304 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7305 eff_type = field_type;
7308 gcc_assert ((TYPE_ALIGN (eff_type)
7309 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7310 || (TYPE_ALIGN (eff_type)
7311 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7316 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7318 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7319 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7320 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7325 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7328 addr = create_tmp_var (pptr_type_node, NULL);
7329 lab_false = create_artificial_label ();
7330 lab_over = create_artificial_label ();
7332 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7336 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7338 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7340 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7341 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7342 gimplify_and_add (tmp, pre_p);
7344 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7345 gimplify_and_add (tmp, pre_p);
7346 tmp = next_fp_limit;
7347 if (size > 4 && !is_double)
7348 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp,
7349 size_int (4 - size));
7350 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7351 cmp = build3 (COND_EXPR, void_type_node, tmp,
7352 build1 (GOTO_EXPR, void_type_node, lab_false),
7355 gimplify_and_add (cmp, pre_p);
7357 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7358 || (is_double || size == 16))
7360 tmp = fold_convert (sizetype, next_fp_tmp);
7361 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7362 size_int (UNITS_PER_WORD));
7363 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7365 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7367 gimplify_and_add (tmp, pre_p);
7370 gimplify_and_add (cmp, pre_p);
7372 #ifdef FUNCTION_ARG_SCmode_WART
7373 if (TYPE_MODE (eff_type) == SCmode
7374 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7376 tree subtype = TREE_TYPE (eff_type);
7380 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7381 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7384 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7385 real = get_initialized_tmp_var (real, pre_p, NULL);
7387 result = build2 (COMPLEX_EXPR, type, real, imag);
7388 result = get_initialized_tmp_var (result, pre_p, NULL);
7390 #endif /* FUNCTION_ARG_SCmode_WART */
7392 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7393 gimplify_and_add (tmp, pre_p);
7395 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7396 gimplify_and_add (tmp, pre_p);
7398 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7399 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7400 gimplify_and_add (tmp, pre_p);
7401 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7402 gimplify_and_add (tmp, pre_p);
7404 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7405 gimplify_and_add (tmp, post_p);
7406 valist = next_fp_tmp;
7410 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, next_o,
7412 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7413 tmp = build3 (COND_EXPR, void_type_node, tmp,
7414 build1 (GOTO_EXPR, void_type_node, lab_false),
7416 gimplify_and_add (tmp, pre_p);
7418 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7419 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7420 gimplify_and_add (tmp, pre_p);
7422 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7423 gimplify_and_add (tmp, pre_p);
7425 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7426 gimplify_and_add (tmp, pre_p);
7428 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7430 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7431 next_o, next_o_limit);
7432 gimplify_and_add (tmp, pre_p);
7435 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7436 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7437 gimplify_and_add (tmp, pre_p);
7442 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7443 gimplify_and_add (tmp, pre_p);
7447 /* ??? In va-sh.h, there had been code to make values larger than
7448 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7450 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7453 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7454 gimplify_and_add (tmp, pre_p);
7456 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7457 gimplify_and_add (tmp, pre_p);
7463 result = build_va_arg_indirect_ref (result);
7469 sh_promote_prototypes (const_tree type)
7475 return ! sh_attr_renesas_p (type);
7478 /* Whether an argument must be passed by reference. On SHcompact, we
7479 pretend arguments wider than 32-bits that would have been passed in
7480 registers are passed by reference, so that an SHmedia trampoline
7481 loads them into the full 64-bits registers. */
7484 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7485 const_tree type, bool named)
7487 unsigned HOST_WIDE_INT size;
7490 size = int_size_in_bytes (type);
7492 size = GET_MODE_SIZE (mode);
7494 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7496 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7497 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7498 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7500 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7501 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7508 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7509 const_tree type, bool named)
7511 if (targetm.calls.must_pass_in_stack (mode, type))
7514 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7515 wants to know about pass-by-reference semantics for incoming
7520 if (TARGET_SHCOMPACT)
7522 cum->byref = shcompact_byref (cum, mode, type, named);
7523 return cum->byref != 0;
7530 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7531 const_tree type, bool named ATTRIBUTE_UNUSED)
7533 /* ??? How can it possibly be correct to return true only on the
7534 caller side of the equation? Is there someplace else in the
7535 sh backend that's magically producing the copies? */
7536 return (cum->outgoing
7537 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7538 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7542 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7543 tree type, bool named ATTRIBUTE_UNUSED)
7548 && PASS_IN_REG_P (*cum, mode, type)
7549 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7550 && (ROUND_REG (*cum, mode)
7552 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7553 : ROUND_ADVANCE (int_size_in_bytes (type)))
7554 > NPARM_REGS (mode)))
7555 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7557 else if (!TARGET_SHCOMPACT
7558 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7559 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7561 return words * UNITS_PER_WORD;
7565 /* Define where to put the arguments to a function.
7566 Value is zero to push the argument on the stack,
7567 or a hard register in which to store the argument.
7569 MODE is the argument's machine mode.
7570 TYPE is the data type of the argument (as a tree).
7571 This is null for libcalls where that information may
7573 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7574 the preceding args and about the function being called.
7575 NAMED is nonzero if this argument is a named parameter
7576 (otherwise it is an extra parameter matching an ellipsis).
7578 On SH the first args are normally in registers
7579 and the rest are pushed. Any arg that starts within the first
7580 NPARM_REGS words is at least partially passed in a register unless
7581 its data type forbids. */
7585 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7586 tree type, int named)
7588 if (! TARGET_SH5 && mode == VOIDmode)
7589 return GEN_INT (ca->renesas_abi ? 1 : 0);
7592 && PASS_IN_REG_P (*ca, mode, type)
7593 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7597 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7598 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7600 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7601 gen_rtx_REG (SFmode,
7603 + (ROUND_REG (*ca, mode) ^ 1)),
7605 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7606 gen_rtx_REG (SFmode,
7608 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7610 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7613 /* If the alignment of a DF value causes an SF register to be
7614 skipped, we will use that skipped register for the next SF
7616 if ((TARGET_HITACHI || ca->renesas_abi)
7617 && ca->free_single_fp_reg
7619 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7621 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7622 ^ (mode == SFmode && TARGET_SH4
7623 && TARGET_LITTLE_ENDIAN != 0
7624 && ! TARGET_HITACHI && ! ca->renesas_abi);
7625 return gen_rtx_REG (mode, regno);
7631 if (mode == VOIDmode && TARGET_SHCOMPACT)
7632 return GEN_INT (ca->call_cookie);
7634 /* The following test assumes unnamed arguments are promoted to
7636 if (mode == SFmode && ca->free_single_fp_reg)
7637 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7639 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7640 && (named || ! ca->prototype_p)
7641 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7643 if (! ca->prototype_p && TARGET_SHMEDIA)
7644 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7646 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7648 + ca->arg_count[(int) SH_ARG_FLOAT]);
7651 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7652 && (! TARGET_SHCOMPACT
7653 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7654 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7657 return gen_rtx_REG (mode, (FIRST_PARM_REG
7658 + ca->arg_count[(int) SH_ARG_INT]));
7667 /* Update the data in CUM to advance over an argument
7668 of mode MODE and data type TYPE.
7669 (TYPE is null for libcalls where that information may not be
7673 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7674 tree type, int named)
7678 else if (TARGET_SH5)
7680 tree type2 = (ca->byref && type
7683 enum machine_mode mode2 = (ca->byref && type
7686 int dwords = ((ca->byref
7689 ? int_size_in_bytes (type2)
7690 : GET_MODE_SIZE (mode2)) + 7) / 8;
7691 int numregs = MIN (dwords, NPARM_REGS (SImode)
7692 - ca->arg_count[(int) SH_ARG_INT]);
7696 ca->arg_count[(int) SH_ARG_INT] += numregs;
7697 if (TARGET_SHCOMPACT
7698 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7701 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7703 /* N.B. We want this also for outgoing. */
7704 ca->stack_regs += numregs;
7709 ca->stack_regs += numregs;
7710 ca->byref_regs += numregs;
7714 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7718 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7721 else if (dwords > numregs)
7723 int pushregs = numregs;
7725 if (TARGET_SHCOMPACT)
7726 ca->stack_regs += numregs;
7727 while (pushregs < NPARM_REGS (SImode) - 1
7728 && (CALL_COOKIE_INT_REG_GET
7730 NPARM_REGS (SImode) - pushregs)
7734 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7738 if (numregs == NPARM_REGS (SImode))
7740 |= CALL_COOKIE_INT_REG (0, 1)
7741 | CALL_COOKIE_STACKSEQ (numregs - 1);
7744 |= CALL_COOKIE_STACKSEQ (numregs);
7747 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7748 && (named || ! ca->prototype_p))
7750 if (mode2 == SFmode && ca->free_single_fp_reg)
7751 ca->free_single_fp_reg = 0;
7752 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7753 < NPARM_REGS (SFmode))
7756 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7758 - ca->arg_count[(int) SH_ARG_FLOAT]);
7760 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7762 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7764 if (ca->outgoing && numregs > 0)
7768 |= (CALL_COOKIE_INT_REG
7769 (ca->arg_count[(int) SH_ARG_INT]
7770 - numregs + ((numfpregs - 2) / 2),
7771 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7774 while (numfpregs -= 2);
7776 else if (mode2 == SFmode && (named)
7777 && (ca->arg_count[(int) SH_ARG_FLOAT]
7778 < NPARM_REGS (SFmode)))
7779 ca->free_single_fp_reg
7780 = FIRST_FP_PARM_REG - numfpregs
7781 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7787 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7789 /* Note that we've used the skipped register. */
7790 if (mode == SFmode && ca->free_single_fp_reg)
7792 ca->free_single_fp_reg = 0;
7795 /* When we have a DF after an SF, there's an SF register that get
7796 skipped in order to align the DF value. We note this skipped
7797 register, because the next SF value will use it, and not the
7798 SF that follows the DF. */
7800 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7802 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7803 + BASE_ARG_REG (mode));
7807 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7808 || PASS_IN_REG_P (*ca, mode, type))
7809 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7810 = (ROUND_REG (*ca, mode)
7812 ? ROUND_ADVANCE (int_size_in_bytes (type))
7813 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7816 /* The Renesas calling convention doesn't quite fit into this scheme since
7817 the address is passed like an invisible argument, but one that is always
7818 passed in memory. */
7820 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7822 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7824 return gen_rtx_REG (Pmode, 2);
7827 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7830 sh_return_in_memory (const_tree type, const_tree fndecl)
7834 if (TYPE_MODE (type) == BLKmode)
7835 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7837 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7841 return (TYPE_MODE (type) == BLKmode
7842 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7843 && TREE_CODE (type) == RECORD_TYPE));
7847 /* We actually emit the code in sh_expand_prologue. We used to use
7848 a static variable to flag that we need to emit this code, but that
7849 doesn't when inlining, when functions are deferred and then emitted
7850 later. Fortunately, we already have two flags that are part of struct
7851 function that tell if a function uses varargs or stdarg. */
7853 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7854 enum machine_mode mode,
7856 int *pretend_arg_size,
7857 int second_time ATTRIBUTE_UNUSED)
7859 gcc_assert (current_function_stdarg);
7860 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7862 int named_parm_regs, anon_parm_regs;
7864 named_parm_regs = (ROUND_REG (*ca, mode)
7866 ? ROUND_ADVANCE (int_size_in_bytes (type))
7867 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7868 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7869 if (anon_parm_regs > 0)
7870 *pretend_arg_size = anon_parm_regs * 4;
7875 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7881 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7883 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7887 /* Define the offset between two registers, one to be eliminated, and
7888 the other its replacement, at the start of a routine. */
7891 initial_elimination_offset (int from, int to)
7894 int regs_saved_rounding = 0;
7895 int total_saved_regs_space;
7896 int total_auto_space;
7897 int save_flags = target_flags;
7899 HARD_REG_SET live_regs_mask;
7901 shmedia_space_reserved_for_target_registers = false;
7902 regs_saved = calc_live_regs (&live_regs_mask);
7903 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7905 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7907 shmedia_space_reserved_for_target_registers = true;
7908 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7911 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7912 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7913 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7915 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7916 copy_flags = target_flags;
7917 target_flags = save_flags;
7919 total_saved_regs_space = regs_saved + regs_saved_rounding;
7921 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7922 return total_saved_regs_space + total_auto_space
7923 + crtl->args.info.byref_regs * 8;
7925 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7926 return total_saved_regs_space + total_auto_space
7927 + crtl->args.info.byref_regs * 8;
7929 /* Initial gap between fp and sp is 0. */
7930 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7933 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7934 return rounded_frame_size (0);
7936 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7937 return rounded_frame_size (0);
7939 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7940 && (to == HARD_FRAME_POINTER_REGNUM
7941 || to == STACK_POINTER_REGNUM));
7944 int n = total_saved_regs_space;
7945 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7946 save_schedule schedule;
7949 n += total_auto_space;
7951 /* If it wasn't saved, there's not much we can do. */
7952 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7955 target_flags = copy_flags;
7957 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7958 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7959 if (entry->reg == pr_reg)
7961 target_flags = save_flags;
7962 return entry->offset;
7967 return total_auto_space;
7970 /* Insert any deferred function attributes from earlier pragmas. */
7972 sh_insert_attributes (tree node, tree *attributes)
7976 if (TREE_CODE (node) != FUNCTION_DECL)
7979 /* We are only interested in fields. */
7983 /* Append the attributes to the deferred attributes. */
7984 *sh_deferred_function_attributes_tail = *attributes;
7985 attrs = sh_deferred_function_attributes;
7989 /* Some attributes imply or require the interrupt attribute. */
7990 if (!lookup_attribute ("interrupt_handler", attrs)
7991 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7993 /* If we have a trapa_handler, but no interrupt_handler attribute,
7994 insert an interrupt_handler attribute. */
7995 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7996 /* We can't use sh_pr_interrupt here because that's not in the
7999 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8000 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8001 if the interrupt attribute is missing, we ignore the attribute
8003 else if (lookup_attribute ("sp_switch", attrs)
8004 || lookup_attribute ("trap_exit", attrs)
8005 || lookup_attribute ("nosave_low_regs", attrs)
8006 || lookup_attribute ("resbank", attrs))
8010 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8012 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8013 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8014 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8015 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8016 warning (OPT_Wattributes,
8017 "%qs attribute only applies to interrupt functions",
8018 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
8021 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8023 tail = &TREE_CHAIN (*tail);
8026 attrs = *attributes;
8030 /* Install the processed list. */
8031 *attributes = attrs;
8033 /* Clear deferred attributes. */
8034 sh_deferred_function_attributes = NULL_TREE;
8035 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8040 /* Supported attributes:
8042 interrupt_handler -- specifies this function is an interrupt handler.
8044 trapa_handler - like above, but don't save all registers.
8046 sp_switch -- specifies an alternate stack for an interrupt handler
8049 trap_exit -- use a trapa to exit an interrupt function instead of
8052 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8053 This is useful on the SH3 and upwards,
8054 which has a separate set of low regs for User and Supervisor modes.
8055 This should only be used for the lowest level of interrupts. Higher levels
8056 of interrupts must save the registers in case they themselves are
8059 renesas -- use Renesas calling/layout conventions (functions and
8062 resbank -- In case of an ISR, use a register bank to save registers
8063 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8066 const struct attribute_spec sh_attribute_table[] =
8068 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
8069 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8070 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
8071 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
8072 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
8073 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8074 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
8075 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
8076 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
8078 /* Symbian support adds three new attributes:
8079 dllexport - for exporting a function/variable that will live in a dll
8080 dllimport - for importing a function/variable from a dll
8082 Microsoft allows multiple declspecs in one __declspec, separating
8083 them with spaces. We do NOT support this. Instead, use __declspec
8085 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8086 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8088 { NULL, 0, 0, false, false, false, NULL }
8091 /* Handle a 'resbank' attribute. */
8093 sh_handle_resbank_handler_attribute (tree * node, tree name,
8094 tree args ATTRIBUTE_UNUSED,
8095 int flags ATTRIBUTE_UNUSED,
8096 bool * no_add_attrs)
8100 warning (OPT_Wattributes, "%qs attribute is supported only for SH2A",
8101 IDENTIFIER_POINTER (name));
8102 *no_add_attrs = true;
8104 if (TREE_CODE (*node) != FUNCTION_DECL)
8106 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8107 IDENTIFIER_POINTER (name));
8108 *no_add_attrs = true;
8114 /* Handle an "interrupt_handler" attribute; arguments as in
8115 struct attribute_spec.handler. */
8117 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8118 tree args ATTRIBUTE_UNUSED,
8119 int flags ATTRIBUTE_UNUSED,
8122 if (TREE_CODE (*node) != FUNCTION_DECL)
8124 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8125 IDENTIFIER_POINTER (name));
8126 *no_add_attrs = true;
8128 else if (TARGET_SHCOMPACT)
8130 error ("attribute interrupt_handler is not compatible with -m5-compact");
8131 *no_add_attrs = true;
8137 /* Handle an 'function_vector' attribute; arguments as in
8138 struct attribute_spec.handler. */
8140 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8141 tree args ATTRIBUTE_UNUSED,
8142 int flags ATTRIBUTE_UNUSED,
8143 bool * no_add_attrs)
8147 warning (OPT_Wattributes, "%qs attribute only applies to SH2A",
8148 IDENTIFIER_POINTER (name));
8149 *no_add_attrs = true;
8151 else if (TREE_CODE (*node) != FUNCTION_DECL)
8153 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8154 IDENTIFIER_POINTER (name));
8155 *no_add_attrs = true;
8157 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8159 /* The argument must be a constant integer. */
8160 warning (OPT_Wattributes,
8161 "`%s' attribute argument not an integer constant",
8162 IDENTIFIER_POINTER (name));
8163 *no_add_attrs = true;
8165 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8167 /* The argument value must be between 0 to 255. */
8168 warning (OPT_Wattributes,
8169 "`%s' attribute argument should be between 0 to 255",
8170 IDENTIFIER_POINTER (name));
8171 *no_add_attrs = true;
8176 /* Returns 1 if current function has been assigned the attribute
8177 'function_vector'. */
8179 sh2a_is_function_vector_call (rtx x)
8181 if (GET_CODE (x) == SYMBOL_REF
8182 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8184 tree tr = SYMBOL_REF_DECL (x);
8186 if (sh2a_function_vector_p (tr))
8193 /* Returns the function vector number, if the the attribute
8194 'function_vector' is assigned, otherwise returns zero. */
8196 sh2a_get_function_vector_number (rtx x)
8201 if ((GET_CODE (x) == SYMBOL_REF)
8202 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8204 t = SYMBOL_REF_DECL (x);
8206 if (TREE_CODE (t) != FUNCTION_DECL)
8209 list = SH_ATTRIBUTES (t);
8212 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8214 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8218 list = TREE_CHAIN (list);
8227 /* Handle an "sp_switch" attribute; arguments as in
8228 struct attribute_spec.handler. */
8230 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8231 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8233 if (TREE_CODE (*node) != FUNCTION_DECL)
8235 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8236 IDENTIFIER_POINTER (name));
8237 *no_add_attrs = true;
8239 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8241 /* The argument must be a constant string. */
8242 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8243 IDENTIFIER_POINTER (name));
8244 *no_add_attrs = true;
8250 /* Handle an "trap_exit" attribute; arguments as in
8251 struct attribute_spec.handler. */
8253 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8254 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8256 if (TREE_CODE (*node) != FUNCTION_DECL)
8258 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8259 IDENTIFIER_POINTER (name));
8260 *no_add_attrs = true;
8262 /* The argument specifies a trap number to be used in a trapa instruction
8263 at function exit (instead of an rte instruction). */
8264 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8266 /* The argument must be a constant integer. */
8267 warning (OPT_Wattributes, "%qs attribute argument not an "
8268 "integer constant", IDENTIFIER_POINTER (name));
8269 *no_add_attrs = true;
8276 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8277 tree name ATTRIBUTE_UNUSED,
8278 tree args ATTRIBUTE_UNUSED,
8279 int flags ATTRIBUTE_UNUSED,
8280 bool *no_add_attrs ATTRIBUTE_UNUSED)
8285 /* True if __attribute__((renesas)) or -mrenesas. */
8287 sh_attr_renesas_p (const_tree td)
8294 td = TREE_TYPE (td);
8295 if (td == error_mark_node)
8297 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8301 /* True if __attribute__((renesas)) or -mrenesas, for the current
8304 sh_cfun_attr_renesas_p (void)
8306 return sh_attr_renesas_p (current_function_decl);
8310 sh_cfun_interrupt_handler_p (void)
8312 return (lookup_attribute ("interrupt_handler",
8313 DECL_ATTRIBUTES (current_function_decl))
8317 /* Returns 1 if FUNC has been assigned the attribute
8318 "function_vector". */
8320 sh2a_function_vector_p (tree func)
8323 if (TREE_CODE (func) != FUNCTION_DECL)
8326 list = SH_ATTRIBUTES (func);
8329 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8332 list = TREE_CHAIN (list);
8337 /* Returns TRUE if given tree has the "resbank" attribute. */
8340 sh_cfun_resbank_handler_p (void)
8342 return ((lookup_attribute ("resbank",
8343 DECL_ATTRIBUTES (current_function_decl))
8345 && (lookup_attribute ("interrupt_handler",
8346 DECL_ATTRIBUTES (current_function_decl))
8347 != NULL_TREE) && TARGET_SH2A);
8350 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8353 sh_check_pch_target_flags (int old_flags)
8355 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8356 | MASK_SH_E | MASK_HARD_SH4
8357 | MASK_FPU_SINGLE | MASK_SH4))
8358 return _("created and used with different architectures / ABIs");
8359 if ((old_flags ^ target_flags) & MASK_HITACHI)
8360 return _("created and used with different ABIs");
8361 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8362 return _("created and used with different endianness");
8366 /* Predicates used by the templates. */
8368 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8369 Used only in general_movsrc_operand. */
8372 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8384 /* Nonzero if OP is a floating point value with value 0.0. */
8387 fp_zero_operand (rtx op)
8391 if (GET_MODE (op) != SFmode)
8394 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8395 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8398 /* Nonzero if OP is a floating point value with value 1.0. */
8401 fp_one_operand (rtx op)
8405 if (GET_MODE (op) != SFmode)
8408 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8409 return REAL_VALUES_EQUAL (r, dconst1);
8412 /* For -m4 and -m4-single-only, mode switching is used. If we are
8413 compiling without -mfmovd, movsf_ie isn't taken into account for
8414 mode switching. We could check in machine_dependent_reorg for
8415 cases where we know we are in single precision mode, but there is
8416 interface to find that out during reload, so we must avoid
8417 choosing an fldi alternative during reload and thus failing to
8418 allocate a scratch register for the constant loading. */
8422 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8426 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8428 enum rtx_code code = GET_CODE (op);
8429 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8432 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8434 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8436 if (GET_CODE (op) != SYMBOL_REF)
8438 return SYMBOL_REF_TLS_MODEL (op);
8441 /* Return the destination address of a branch. */
8444 branch_dest (rtx branch)
8446 rtx dest = SET_SRC (PATTERN (branch));
8449 if (GET_CODE (dest) == IF_THEN_ELSE)
8450 dest = XEXP (dest, 1);
8451 dest = XEXP (dest, 0);
8452 dest_uid = INSN_UID (dest);
8453 return INSN_ADDRESSES (dest_uid);
8456 /* Return nonzero if REG is not used after INSN.
8457 We assume REG is a reload reg, and therefore does
8458 not live past labels. It may live past calls or jumps though. */
8460 reg_unused_after (rtx reg, rtx insn)
8465 /* If the reg is set by this instruction, then it is safe for our
8466 case. Disregard the case where this is a store to memory, since
8467 we are checking a register used in the store address. */
8468 set = single_set (insn);
8469 if (set && GET_CODE (SET_DEST (set)) != MEM
8470 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8473 while ((insn = NEXT_INSN (insn)))
8479 code = GET_CODE (insn);
8482 /* If this is a label that existed before reload, then the register
8483 if dead here. However, if this is a label added by reorg, then
8484 the register may still be live here. We can't tell the difference,
8485 so we just ignore labels completely. */
8486 if (code == CODE_LABEL)
8491 if (code == JUMP_INSN)
8494 /* If this is a sequence, we must handle them all at once.
8495 We could have for instance a call that sets the target register,
8496 and an insn in a delay slot that uses the register. In this case,
8497 we must return 0. */
8498 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8503 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8505 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8506 rtx set = single_set (this_insn);
8508 if (GET_CODE (this_insn) == CALL_INSN)
8510 else if (GET_CODE (this_insn) == JUMP_INSN)
8512 if (INSN_ANNULLED_BRANCH_P (this_insn))
8517 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8519 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8521 if (GET_CODE (SET_DEST (set)) != MEM)
8527 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8532 else if (code == JUMP_INSN)
8536 set = single_set (insn);
8537 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8539 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8540 return GET_CODE (SET_DEST (set)) != MEM;
8541 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8544 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8552 static GTY(()) rtx fpscr_rtx;
8554 get_fpscr_rtx (void)
8558 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8559 REG_USERVAR_P (fpscr_rtx) = 1;
8560 mark_user_reg (fpscr_rtx);
8562 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8563 mark_user_reg (fpscr_rtx);
8567 static GTY(()) tree fpscr_values;
8570 emit_fpu_switch (rtx scratch, int index)
8574 if (fpscr_values == NULL)
8578 t = build_index_type (integer_one_node);
8579 t = build_array_type (integer_type_node, t);
8580 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8581 DECL_ARTIFICIAL (t) = 1;
8582 DECL_IGNORED_P (t) = 1;
8583 DECL_EXTERNAL (t) = 1;
8584 TREE_STATIC (t) = 1;
8585 TREE_PUBLIC (t) = 1;
8591 src = DECL_RTL (fpscr_values);
8592 if (!can_create_pseudo_p ())
8594 emit_move_insn (scratch, XEXP (src, 0));
8596 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8597 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8600 src = adjust_address (src, PSImode, index * 4);
8602 dst = get_fpscr_rtx ();
8603 emit_move_insn (dst, src);
8607 emit_sf_insn (rtx pat)
8613 emit_df_insn (rtx pat)
8619 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8621 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8625 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8627 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8632 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8634 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8638 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8640 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8644 static rtx get_free_reg (HARD_REG_SET);
8646 /* This function returns a register to use to load the address to load
8647 the fpscr from. Currently it always returns r1 or r7, but when we are
8648 able to use pseudo registers after combine, or have a better mechanism
8649 for choosing a register, it should be done here. */
8650 /* REGS_LIVE is the liveness information for the point for which we
8651 need this allocation. In some bare-bones exit blocks, r1 is live at the
8652 start. We can even have all of r0..r3 being live:
8653 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8654 INSN before which new insns are placed with will clobber the register
8655 we return. If a basic block consists only of setting the return value
8656 register to a pseudo and using that register, the return value is not
8657 live before or after this block, yet we we'll insert our insns right in
8661 get_free_reg (HARD_REG_SET regs_live)
8663 if (! TEST_HARD_REG_BIT (regs_live, 1))
8664 return gen_rtx_REG (Pmode, 1);
8666 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8667 there shouldn't be anything but a jump before the function end. */
8668 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8669 return gen_rtx_REG (Pmode, 7);
8672 /* This function will set the fpscr from memory.
8673 MODE is the mode we are setting it to. */
8675 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8677 enum attr_fp_mode fp_mode = mode;
8678 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8681 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8682 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8685 /* Is the given character a logical line separator for the assembler? */
8686 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8687 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8691 sh_insn_length_adjustment (rtx insn)
8693 /* Instructions with unfilled delay slots take up an extra two bytes for
8694 the nop in the delay slot. */
8695 if (((GET_CODE (insn) == INSN
8696 && GET_CODE (PATTERN (insn)) != USE
8697 && GET_CODE (PATTERN (insn)) != CLOBBER)
8698 || GET_CODE (insn) == CALL_INSN
8699 || (GET_CODE (insn) == JUMP_INSN
8700 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8701 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8702 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8703 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8706 /* SH2e has a bug that prevents the use of annulled branches, so if
8707 the delay slot is not filled, we'll have to put a NOP in it. */
8708 if (sh_cpu == CPU_SH2E
8709 && GET_CODE (insn) == JUMP_INSN
8710 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8711 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8712 && get_attr_type (insn) == TYPE_CBRANCH
8713 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8716 /* sh-dsp parallel processing insn take four bytes instead of two. */
8718 if (GET_CODE (insn) == INSN)
8721 rtx body = PATTERN (insn);
8722 const char *template;
8724 int maybe_label = 1;
8726 if (GET_CODE (body) == ASM_INPUT)
8727 template = XSTR (body, 0);
8728 else if (asm_noperands (body) >= 0)
8730 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8739 while (c == ' ' || c == '\t');
8740 /* all sh-dsp parallel-processing insns start with p.
8741 The only non-ppi sh insn starting with p is pref.
8742 The only ppi starting with pr is prnd. */
8743 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8745 /* The repeat pseudo-insn expands two three insns, a total of
8746 six bytes in size. */
8747 else if ((c == 'r' || c == 'R')
8748 && ! strncasecmp ("epeat", template, 5))
8750 while (c && c != '\n'
8751 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, template))
8753 /* If this is a label, it is obviously not a ppi insn. */
8754 if (c == ':' && maybe_label)
8759 else if (c == '\'' || c == '"')
8764 maybe_label = c != ':';
8772 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8773 isn't protected by a PIC unspec. */
8775 nonpic_symbol_mentioned_p (rtx x)
8777 register const char *fmt;
8780 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8781 || GET_CODE (x) == PC)
8784 /* We don't want to look into the possible MEM location of a
8785 CONST_DOUBLE, since we're not going to use it, in general. */
8786 if (GET_CODE (x) == CONST_DOUBLE)
8789 if (GET_CODE (x) == UNSPEC
8790 && (XINT (x, 1) == UNSPEC_PIC
8791 || XINT (x, 1) == UNSPEC_GOT
8792 || XINT (x, 1) == UNSPEC_GOTOFF
8793 || XINT (x, 1) == UNSPEC_GOTPLT
8794 || XINT (x, 1) == UNSPEC_GOTTPOFF
8795 || XINT (x, 1) == UNSPEC_DTPOFF
8796 || XINT (x, 1) == UNSPEC_PLT))
8799 fmt = GET_RTX_FORMAT (GET_CODE (x));
8800 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8806 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8807 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8810 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8817 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8818 @GOTOFF in `reg'. */
8820 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8823 if (tls_symbolic_operand (orig, Pmode))
8826 if (GET_CODE (orig) == LABEL_REF
8827 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8830 reg = gen_reg_rtx (Pmode);
8832 emit_insn (gen_symGOTOFF2reg (reg, orig));
8835 else if (GET_CODE (orig) == SYMBOL_REF)
8838 reg = gen_reg_rtx (Pmode);
8840 emit_insn (gen_symGOT2reg (reg, orig));
8846 /* Mark the use of a constant in the literal table. If the constant
8847 has multiple labels, make it unique. */
8849 mark_constant_pool_use (rtx x)
8851 rtx insn, lab, pattern;
8856 switch (GET_CODE (x))
8866 /* Get the first label in the list of labels for the same constant
8867 and delete another labels in the list. */
8869 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8871 if (GET_CODE (insn) != CODE_LABEL
8872 || LABEL_REFS (insn) != NEXT_INSN (insn))
8877 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8878 INSN_DELETED_P (insn) = 1;
8880 /* Mark constants in a window. */
8881 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8883 if (GET_CODE (insn) != INSN)
8886 pattern = PATTERN (insn);
8887 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8890 switch (XINT (pattern, 1))
8892 case UNSPECV_CONST2:
8893 case UNSPECV_CONST4:
8894 case UNSPECV_CONST8:
8895 XVECEXP (pattern, 0, 1) = const1_rtx;
8897 case UNSPECV_WINDOW_END:
8898 if (XVECEXP (pattern, 0, 0) == x)
8901 case UNSPECV_CONST_END:
8911 /* Return true if it's possible to redirect BRANCH1 to the destination
8912 of an unconditional jump BRANCH2. We only want to do this if the
8913 resulting branch will have a short displacement. */
8915 sh_can_redirect_branch (rtx branch1, rtx branch2)
8917 if (flag_expensive_optimizations && simplejump_p (branch2))
8919 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8923 for (distance = 0, insn = NEXT_INSN (branch1);
8924 insn && distance < 256;
8925 insn = PREV_INSN (insn))
8930 distance += get_attr_length (insn);
8932 for (distance = 0, insn = NEXT_INSN (branch1);
8933 insn && distance < 256;
8934 insn = NEXT_INSN (insn))
8939 distance += get_attr_length (insn);
8945 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8947 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8948 unsigned int new_reg)
8950 /* Interrupt functions can only use registers that have already been
8951 saved by the prologue, even if they would normally be
8954 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
8960 /* Function to update the integer COST
8961 based on the relationship between INSN that is dependent on
8962 DEP_INSN through the dependence LINK. The default is to make no
8963 adjustment to COST. This can be used for example to specify to
8964 the scheduler that an output- or anti-dependence does not incur
8965 the same cost as a data-dependence. The return value should be
8966 the new value for COST. */
8968 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8974 /* On SHmedia, if the dependence is an anti-dependence or
8975 output-dependence, there is no cost. */
8976 if (REG_NOTE_KIND (link) != 0)
8978 /* However, dependencies between target register loads and
8979 uses of the register in a subsequent block that are separated
8980 by a conditional branch are not modelled - we have to do with
8981 the anti-dependency between the target register load and the
8982 conditional branch that ends the current block. */
8983 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8984 && GET_CODE (PATTERN (dep_insn)) == SET
8985 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8986 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8987 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8989 int orig_cost = cost;
8990 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8991 rtx target = ((! note
8992 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8993 ? insn : JUMP_LABEL (insn));
8994 /* On the likely path, the branch costs 1, on the unlikely path,
8998 target = next_active_insn (target);
8999 while (target && ! flow_dependent_p (target, dep_insn)
9001 /* If two branches are executed in immediate succession, with the
9002 first branch properly predicted, this causes a stall at the
9003 second branch, hence we won't need the target for the
9004 second branch for two cycles after the launch of the first
9006 if (cost > orig_cost - 2)
9007 cost = orig_cost - 2;
9013 else if (get_attr_is_mac_media (insn)
9014 && get_attr_is_mac_media (dep_insn))
9017 else if (! reload_completed
9018 && GET_CODE (PATTERN (insn)) == SET
9019 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9020 && GET_CODE (PATTERN (dep_insn)) == SET
9021 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9024 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9025 that is needed at the target. */
9026 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9027 && ! flow_dependent_p (insn, dep_insn))
9030 else if (REG_NOTE_KIND (link) == 0)
9032 enum attr_type type;
9035 if (recog_memoized (insn) < 0
9036 || recog_memoized (dep_insn) < 0)
9039 dep_set = single_set (dep_insn);
9041 /* The latency that we specify in the scheduling description refers
9042 to the actual output, not to an auto-increment register; for that,
9043 the latency is one. */
9044 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9046 rtx set = single_set (insn);
9049 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9050 && (!MEM_P (SET_DEST (set))
9051 || !reg_mentioned_p (SET_DEST (dep_set),
9052 XEXP (SET_DEST (set), 0))))
9055 /* The only input for a call that is timing-critical is the
9056 function's address. */
9057 if (GET_CODE (insn) == CALL_INSN)
9059 rtx call = PATTERN (insn);
9061 if (GET_CODE (call) == PARALLEL)
9062 call = XVECEXP (call, 0 ,0);
9063 if (GET_CODE (call) == SET)
9064 call = SET_SRC (call);
9065 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
9066 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9067 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9068 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9069 cost -= TARGET_SH4_300 ? 3 : 6;
9071 /* Likewise, the most timing critical input for an sfuncs call
9072 is the function address. However, sfuncs typically start
9073 using their arguments pretty quickly.
9074 Assume a four cycle delay for SH4 before they are needed.
9075 Cached ST40-300 calls are quicker, so assume only a one
9077 ??? Maybe we should encode the delays till input registers
9078 are needed by sfuncs into the sfunc call insn. */
9079 /* All sfunc calls are parallels with at least four components.
9080 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9081 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9082 && XVECLEN (PATTERN (insn), 0) >= 4
9083 && (reg = sfunc_uses_reg (insn)))
9085 if (! reg_set_p (reg, dep_insn))
9086 cost -= TARGET_SH4_300 ? 1 : 4;
9088 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9090 enum attr_type dep_type = get_attr_type (dep_insn);
9092 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9094 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9095 && (type = get_attr_type (insn)) != TYPE_CALL
9096 && type != TYPE_SFUNC)
9098 /* When the preceding instruction loads the shift amount of
9099 the following SHAD/SHLD, the latency of the load is increased
9101 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9102 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9103 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9104 XEXP (SET_SRC (single_set (insn)),
9107 /* When an LS group instruction with a latency of less than
9108 3 cycles is followed by a double-precision floating-point
9109 instruction, FIPR, or FTRV, the latency of the first
9110 instruction is increased to 3 cycles. */
9112 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9113 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9115 /* The lsw register of a double-precision computation is ready one
9117 else if (reload_completed
9118 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9119 && (use_pat = single_set (insn))
9120 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9124 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9125 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9128 else if (TARGET_SH4_300)
9130 /* Stores need their input register two cycles later. */
9131 if (dep_set && cost >= 1
9132 && ((type = get_attr_type (insn)) == TYPE_STORE
9133 || type == TYPE_PSTORE
9134 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9136 rtx set = single_set (insn);
9138 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9139 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9142 /* But don't reduce the cost below 1 if the address depends
9143 on a side effect of dep_insn. */
9145 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9151 /* An anti-dependence penalty of two applies if the first insn is a double
9152 precision fadd / fsub / fmul. */
9153 else if (!TARGET_SH4_300
9154 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9155 && recog_memoized (dep_insn) >= 0
9156 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9157 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9158 /* A lot of alleged anti-flow dependences are fake,
9159 so check this one is real. */
9160 && flow_dependent_p (dep_insn, insn))
9166 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9167 if DEP_INSN is anti-flow dependent on INSN. */
9169 flow_dependent_p (rtx insn, rtx dep_insn)
9171 rtx tmp = PATTERN (insn);
9173 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9174 return tmp == NULL_RTX;
9177 /* A helper function for flow_dependent_p called through note_stores. */
9179 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9181 rtx * pinsn = (rtx *) data;
9183 if (*pinsn && reg_referenced_p (x, *pinsn))
9187 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9188 'special function' patterns (type sfunc) that clobber pr, but that
9189 do not look like function calls to leaf_function_p. Hence we must
9190 do this extra check. */
9194 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9197 /* Return where to allocate pseudo for a given hard register initial
9200 sh_allocate_initial_value (rtx hard_reg)
9204 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9206 if (current_function_is_leaf
9207 && ! sh_pr_n_sets ()
9208 && ! (TARGET_SHCOMPACT
9209 && ((crtl->args.info.call_cookie
9210 & ~ CALL_COOKIE_RET_TRAMP (1))
9211 || current_function_saves_all_registers)))
9214 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9222 /* This function returns "2" to indicate dual issue for the SH4
9223 processor. To be used by the DFA pipeline description. */
9225 sh_issue_rate (void)
9227 if (TARGET_SUPERSCALAR)
9233 /* Functions for ready queue reordering for sched1. */
9235 /* Get weight for mode for a set x. */
9237 find_set_regmode_weight (rtx x, enum machine_mode mode)
9239 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9241 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9243 if (GET_CODE (SET_DEST (x)) == REG)
9245 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9255 /* Get regmode weight for insn. */
9257 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9259 short reg_weight = 0;
9262 /* Increment weight for each register born here. */
9264 reg_weight += find_set_regmode_weight (x, mode);
9265 if (GET_CODE (x) == PARALLEL)
9268 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9270 x = XVECEXP (PATTERN (insn), 0, j);
9271 reg_weight += find_set_regmode_weight (x, mode);
9274 /* Decrement weight for each register that dies here. */
9275 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9277 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9279 rtx note = XEXP (x, 0);
9280 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9287 /* Calculate regmode weights for all insns of a basic block. */
9289 find_regmode_weight (basic_block b, enum machine_mode mode)
9291 rtx insn, next_tail, head, tail;
9293 get_ebb_head_tail (b, b, &head, &tail);
9294 next_tail = NEXT_INSN (tail);
9296 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9298 /* Handle register life information. */
9303 INSN_REGMODE_WEIGHT (insn, mode) =
9304 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9305 else if (mode == SImode)
9306 INSN_REGMODE_WEIGHT (insn, mode) =
9307 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9311 /* Comparison function for ready queue sorting. */
9313 rank_for_reorder (const void *x, const void *y)
9315 rtx tmp = *(const rtx *) y;
9316 rtx tmp2 = *(const rtx *) x;
9318 /* The insn in a schedule group should be issued the first. */
9319 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9320 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9322 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9323 minimizes instruction movement, thus minimizing sched's effect on
9324 register pressure. */
9325 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9328 /* Resort the array A in which only element at index N may be out of order. */
9330 swap_reorder (rtx *a, int n)
9332 rtx insn = a[n - 1];
9335 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9343 #define SCHED_REORDER(READY, N_READY) \
9346 if ((N_READY) == 2) \
9347 swap_reorder (READY, N_READY); \
9348 else if ((N_READY) > 2) \
9349 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9353 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9356 ready_reorder (rtx *ready, int nready)
9358 SCHED_REORDER (ready, nready);
9361 /* Count life regions of r0 for a block. */
9363 find_r0_life_regions (basic_block b)
9372 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9385 r0_reg = gen_rtx_REG (SImode, R0_REG);
9390 if (find_regno_note (insn, REG_DEAD, R0_REG))
9396 && (pset = single_set (insn))
9397 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9398 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9406 insn = NEXT_INSN (insn);
9411 /* Calculate regmode weights for all insns of all basic block. */
9413 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9414 int verbose ATTRIBUTE_UNUSED,
9419 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9420 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9421 r0_life_regions = 0;
9423 FOR_EACH_BB_REVERSE (b)
9425 find_regmode_weight (b, SImode);
9426 find_regmode_weight (b, SFmode);
9427 if (!reload_completed)
9428 r0_life_regions += find_r0_life_regions (b);
9431 CURR_REGMODE_PRESSURE (SImode) = 0;
9432 CURR_REGMODE_PRESSURE (SFmode) = 0;
9438 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9439 int verbose ATTRIBUTE_UNUSED)
9441 if (regmode_weight[0])
9443 free (regmode_weight[0]);
9444 regmode_weight[0] = NULL;
9446 if (regmode_weight[1])
9448 free (regmode_weight[1]);
9449 regmode_weight[1] = NULL;
9453 /* The scalar modes supported differs from the default version in TImode
9454 for 32-bit SHMEDIA. */
9456 sh_scalar_mode_supported_p (enum machine_mode mode)
9458 if (TARGET_SHMEDIA32 && mode == TImode)
9461 return default_scalar_mode_supported_p (mode);
9464 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9465 keep count of register pressures on SImode and SFmode. */
9467 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9468 int sched_verbose ATTRIBUTE_UNUSED,
9472 if (GET_CODE (PATTERN (insn)) != USE
9473 && GET_CODE (PATTERN (insn)) != CLOBBER)
9474 cached_can_issue_more = can_issue_more - 1;
9476 cached_can_issue_more = can_issue_more;
9478 if (reload_completed)
9479 return cached_can_issue_more;
9481 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9482 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9484 return cached_can_issue_more;
9488 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9489 int verbose ATTRIBUTE_UNUSED,
9490 int veclen ATTRIBUTE_UNUSED)
9492 CURR_REGMODE_PRESSURE (SImode) = 0;
9493 CURR_REGMODE_PRESSURE (SFmode) = 0;
9496 /* Some magic numbers. */
9497 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9498 functions that already have high pressure on r0. */
9499 #define R0_MAX_LIFE_REGIONS 2
9500 /* Register Pressure thresholds for SImode and SFmode registers. */
9501 #define SIMODE_MAX_WEIGHT 5
9502 #define SFMODE_MAX_WEIGHT 10
9504 /* Return true if the pressure is high for MODE. */
9506 high_pressure (enum machine_mode mode)
9508 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9509 functions that already have high pressure on r0. */
9510 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9514 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9516 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9519 /* Reorder ready queue if register pressure is high. */
9521 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9522 int sched_verbose ATTRIBUTE_UNUSED,
9525 int clock_var ATTRIBUTE_UNUSED)
9527 if (reload_completed)
9528 return sh_issue_rate ();
9530 if (high_pressure (SFmode) || high_pressure (SImode))
9532 ready_reorder (ready, *n_readyp);
9535 return sh_issue_rate ();
9538 /* Skip cycles if the current register pressure is high. */
9540 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9541 int sched_verbose ATTRIBUTE_UNUSED,
9542 rtx *ready ATTRIBUTE_UNUSED,
9543 int *n_readyp ATTRIBUTE_UNUSED,
9544 int clock_var ATTRIBUTE_UNUSED)
9546 if (reload_completed)
9547 return cached_can_issue_more;
9549 if (high_pressure(SFmode) || high_pressure (SImode))
9552 return cached_can_issue_more;
9555 /* Skip cycles without sorting the ready queue. This will move insn from
9556 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9557 queue by sh_reorder. */
9559 /* Generally, skipping these many cycles are sufficient for all insns to move
9564 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9565 int sched_verbose ATTRIBUTE_UNUSED,
9566 rtx insn ATTRIBUTE_UNUSED,
9571 if (reload_completed)
9576 if ((clock_var - last_clock_var) < MAX_SKIPS)
9581 /* If this is the last cycle we are skipping, allow reordering of R. */
9582 if ((clock_var - last_clock_var) == MAX_SKIPS)
9594 /* SHmedia requires registers for branches, so we can't generate new
9595 branches past reload. */
9597 sh_cannot_modify_jumps_p (void)
9599 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9603 sh_target_reg_class (void)
9605 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9609 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9616 if (! shmedia_space_reserved_for_target_registers)
9618 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9620 if (calc_live_regs (&dummy) >= 6 * 8)
9626 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9628 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9632 On the SH1..SH4, the trampoline looks like
9633 2 0002 D202 mov.l l2,r2
9634 1 0000 D301 mov.l l1,r3
9637 5 0008 00000000 l1: .long area
9638 6 000c 00000000 l2: .long function
9640 SH5 (compact) uses r1 instead of r3 for the static chain. */
9643 /* Emit RTL insns to initialize the variable parts of a trampoline.
9644 FNADDR is an RTX for the address of the function's pure code.
9645 CXT is an RTX for the static chain value for the function. */
9648 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9650 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9652 if (TARGET_SHMEDIA64)
9657 rtx movi1 = GEN_INT (0xcc000010);
9658 rtx shori1 = GEN_INT (0xc8000010);
9661 /* The following trampoline works within a +- 128 KB range for cxt:
9662 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9663 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9664 gettr tr1,r1; blink tr0,r63 */
9665 /* Address rounding makes it hard to compute the exact bounds of the
9666 offset for this trampoline, but we have a rather generous offset
9667 range, so frame_offset should do fine as an upper bound. */
9668 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9670 /* ??? could optimize this trampoline initialization
9671 by writing DImode words with two insns each. */
9672 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9673 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9674 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9675 insn = gen_rtx_AND (DImode, insn, mask);
9676 /* Or in ptb/u .,tr1 pattern */
9677 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9678 insn = force_operand (insn, NULL_RTX);
9679 insn = gen_lowpart (SImode, insn);
9680 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9681 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9682 insn = gen_rtx_AND (DImode, insn, mask);
9683 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9684 insn = gen_lowpart (SImode, insn);
9685 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9686 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9687 insn = gen_rtx_AND (DImode, insn, mask);
9688 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9689 insn = gen_lowpart (SImode, insn);
9690 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9691 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9692 insn = gen_rtx_AND (DImode, insn, mask);
9693 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9694 insn = gen_lowpart (SImode, insn);
9695 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9696 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9697 insn = gen_rtx_AND (DImode, insn, mask);
9698 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9699 insn = gen_lowpart (SImode, insn);
9700 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9701 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9702 GEN_INT (0x6bf10600));
9703 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9704 GEN_INT (0x4415fc10));
9705 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9706 GEN_INT (0x4401fff0));
9707 emit_insn (gen_ic_invalidate_line (tramp));
9710 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9711 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9713 tramp_templ = gen_datalabel_ref (tramp_templ);
9715 src = gen_const_mem (BLKmode, tramp_templ);
9716 set_mem_align (dst, 256);
9717 set_mem_align (src, 64);
9718 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9720 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9721 emit_move_insn (adjust_address (tramp_mem, Pmode,
9722 fixed_len + GET_MODE_SIZE (Pmode)),
9724 emit_insn (gen_ic_invalidate_line (tramp));
9727 else if (TARGET_SHMEDIA)
9729 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9730 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9731 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9732 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9733 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9734 rotated 10 right, and higher 16 bit of every 32 selected. */
9736 = force_reg (V2HImode, (simplify_gen_subreg
9737 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9738 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9739 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9741 tramp = force_reg (Pmode, tramp);
9742 fnaddr = force_reg (SImode, fnaddr);
9743 cxt = force_reg (SImode, cxt);
9744 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9745 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9747 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9748 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9749 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9750 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9751 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9752 gen_rtx_SUBREG (V2HImode, cxt, 0),
9754 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9755 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9756 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9757 if (TARGET_LITTLE_ENDIAN)
9759 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9760 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9764 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9765 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9767 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9768 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9769 emit_insn (gen_ic_invalidate_line (tramp));
9772 else if (TARGET_SHCOMPACT)
9774 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9777 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9778 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9780 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9781 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9783 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9784 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9787 if (!TARGET_INLINE_IC_INVALIDATE
9788 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9789 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9791 0, VOIDmode, 1, tramp, SImode);
9793 emit_insn (gen_ic_invalidate_line (tramp));
9797 /* FIXME: This is overly conservative. A SHcompact function that
9798 receives arguments ``by reference'' will have them stored in its
9799 own stack frame, so it must not pass pointers or references to
9800 these arguments to other functions by means of sibling calls. */
9801 /* If PIC, we cannot make sibling calls to global functions
9802 because the PLT requires r12 to be live. */
9804 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9807 && (! TARGET_SHCOMPACT
9808 || crtl->args.info.stack_regs == 0)
9809 && ! sh_cfun_interrupt_handler_p ()
9811 || (decl && ! TREE_PUBLIC (decl))
9812 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9815 /* Machine specific built-in functions. */
9817 struct builtin_description
9819 const enum insn_code icode;
9820 const char *const name;
9824 /* describe number and signedness of arguments; arg[0] == result
9825 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9826 /* 9: 64-bit pointer, 10: 32-bit pointer */
9827 static const char signature_args[][4] =
9829 #define SH_BLTIN_V2SI2 0
9831 #define SH_BLTIN_V4HI2 1
9833 #define SH_BLTIN_V2SI3 2
9835 #define SH_BLTIN_V4HI3 3
9837 #define SH_BLTIN_V8QI3 4
9839 #define SH_BLTIN_MAC_HISI 5
9841 #define SH_BLTIN_SH_HI 6
9843 #define SH_BLTIN_SH_SI 7
9845 #define SH_BLTIN_V4HI2V2SI 8
9847 #define SH_BLTIN_V4HI2V8QI 9
9849 #define SH_BLTIN_SISF 10
9851 #define SH_BLTIN_LDUA_L 11
9853 #define SH_BLTIN_LDUA_Q 12
9855 #define SH_BLTIN_STUA_L 13
9857 #define SH_BLTIN_STUA_Q 14
9859 #define SH_BLTIN_LDUA_L64 15
9861 #define SH_BLTIN_LDUA_Q64 16
9863 #define SH_BLTIN_STUA_L64 17
9865 #define SH_BLTIN_STUA_Q64 18
9867 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9868 #define SH_BLTIN_2 19
9869 #define SH_BLTIN_SU 19
9871 #define SH_BLTIN_3 20
9872 #define SH_BLTIN_SUS 20
9874 #define SH_BLTIN_PSSV 21
9876 #define SH_BLTIN_XXUU 22
9877 #define SH_BLTIN_UUUU 22
9879 #define SH_BLTIN_PV 23
9882 /* mcmv: operands considered unsigned. */
9883 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9884 /* mperm: control value considered unsigned int. */
9885 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9886 /* mshards_q: returns signed short. */
9887 /* nsb: takes long long arg, returns unsigned char. */
9888 static const struct builtin_description bdesc[] =
9890 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9891 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9892 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9893 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9894 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9895 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9896 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9897 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9898 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9899 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9900 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9901 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9902 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9903 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9904 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9905 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9906 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9907 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9908 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9909 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9910 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9911 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9912 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9913 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9914 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9915 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9916 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9917 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9918 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9919 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9920 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9921 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9922 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9923 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9924 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9925 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9926 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9927 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9928 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9929 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9930 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9931 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9932 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9933 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9934 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9935 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9936 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9937 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9938 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9939 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9940 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9941 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9942 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9943 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9944 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9945 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9946 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9947 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9948 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9949 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9950 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9951 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9952 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9953 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9954 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9955 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9956 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9957 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9958 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9959 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9960 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9961 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9962 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9963 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9964 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9965 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9966 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9967 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9968 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9969 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9970 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9971 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9972 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9973 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9977 sh_media_init_builtins (void)
9979 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9980 const struct builtin_description *d;
9982 memset (shared, 0, sizeof shared);
9983 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9985 tree type, arg_type = 0;
9986 int signature = d->signature;
9989 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9990 type = shared[signature];
9993 int has_result = signature_args[signature][0] != 0;
9995 if ((signature_args[signature][1] & 8)
9996 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9997 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9999 if (! TARGET_FPU_ANY
10000 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10002 type = void_list_node;
10005 int arg = signature_args[signature][i];
10006 int opno = i - 1 + has_result;
10009 arg_type = ptr_type_node;
10011 arg_type = (*lang_hooks.types.type_for_mode)
10012 (insn_data[d->icode].operand[opno].mode,
10017 arg_type = void_type_node;
10020 type = tree_cons (NULL_TREE, arg_type, type);
10022 type = build_function_type (arg_type, type);
10023 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10024 shared[signature] = type;
10026 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10031 /* Implements target hook vector_mode_supported_p. */
10033 sh_vector_mode_supported_p (enum machine_mode mode)
10036 && ((mode == V2SFmode)
10037 || (mode == V4SFmode)
10038 || (mode == V16SFmode)))
10041 else if (TARGET_SHMEDIA
10042 && ((mode == V8QImode)
10043 || (mode == V2HImode)
10044 || (mode == V4HImode)
10045 || (mode == V2SImode)))
10051 /* Implements target hook dwarf_calling_convention. Return an enum
10052 of dwarf_calling_convention. */
10054 sh_dwarf_calling_convention (const_tree func)
10056 if (sh_attr_renesas_p (func))
10057 return DW_CC_GNU_renesas_sh;
10059 return DW_CC_normal;
10063 sh_init_builtins (void)
10065 if (TARGET_SHMEDIA)
10066 sh_media_init_builtins ();
10069 /* Expand an expression EXP that calls a built-in function,
10070 with result going to TARGET if that's convenient
10071 (and in mode MODE if that's convenient).
10072 SUBTARGET may be used as the target for computing one of EXP's operands.
10073 IGNORE is nonzero if the value is to be ignored. */
10076 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10077 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10079 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10080 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10081 const struct builtin_description *d = &bdesc[fcode];
10082 enum insn_code icode = d->icode;
10083 int signature = d->signature;
10084 enum machine_mode tmode = VOIDmode;
10089 if (signature_args[signature][0])
10094 tmode = insn_data[icode].operand[0].mode;
10096 || GET_MODE (target) != tmode
10097 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10098 target = gen_reg_rtx (tmode);
10099 op[nop++] = target;
10104 for (i = 1; i <= 3; i++, nop++)
10107 enum machine_mode opmode, argmode;
10110 if (! signature_args[signature][i])
10112 arg = CALL_EXPR_ARG (exp, i - 1);
10113 if (arg == error_mark_node)
10115 if (signature_args[signature][i] & 8)
10118 optype = ptr_type_node;
10122 opmode = insn_data[icode].operand[nop].mode;
10123 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10125 argmode = TYPE_MODE (TREE_TYPE (arg));
10126 if (argmode != opmode)
10127 arg = build1 (NOP_EXPR, optype, arg);
10128 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
10129 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10130 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10136 pat = (*insn_data[d->icode].genfun) (op[0]);
10139 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10142 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10145 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10148 gcc_unreachable ();
10157 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10159 rtx sel0 = const0_rtx;
10160 rtx sel1 = const1_rtx;
10161 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10162 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10164 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10165 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10169 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10171 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10173 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10174 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10177 /* Return the class of registers for which a mode change from FROM to TO
10180 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10181 enum reg_class class)
10183 /* We want to enable the use of SUBREGs as a means to
10184 VEC_SELECT a single element of a vector. */
10185 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10186 return (reg_classes_intersect_p (GENERAL_REGS, class));
10188 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10190 if (TARGET_LITTLE_ENDIAN)
10192 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10193 return reg_classes_intersect_p (DF_REGS, class);
10197 if (GET_MODE_SIZE (from) < 8)
10198 return reg_classes_intersect_p (DF_HI_REGS, class);
10205 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10206 that label is used. */
10209 sh_mark_label (rtx address, int nuses)
10211 if (GOTOFF_P (address))
10213 /* Extract the label or symbol. */
10214 address = XEXP (address, 0);
10215 if (GET_CODE (address) == PLUS)
10216 address = XEXP (address, 0);
10217 address = XVECEXP (address, 0, 0);
10219 if (GET_CODE (address) == LABEL_REF
10220 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10221 LABEL_NUSES (XEXP (address, 0)) += nuses;
10224 /* Compute extra cost of moving data between one register class
10227 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10228 uses this information. Hence, the general register <-> floating point
10229 register information here is not used for SFmode. */
10232 sh_register_move_cost (enum machine_mode mode,
10233 enum reg_class srcclass, enum reg_class dstclass)
10235 if (dstclass == T_REGS || dstclass == PR_REGS)
10238 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10241 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10242 && REGCLASS_HAS_FP_REG (srcclass)
10243 && REGCLASS_HAS_FP_REG (dstclass))
10246 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10247 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10249 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10250 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10253 if ((REGCLASS_HAS_FP_REG (dstclass)
10254 && REGCLASS_HAS_GENERAL_REG (srcclass))
10255 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10256 && REGCLASS_HAS_FP_REG (srcclass)))
10257 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10258 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10260 if ((dstclass == FPUL_REGS
10261 && REGCLASS_HAS_GENERAL_REG (srcclass))
10262 || (srcclass == FPUL_REGS
10263 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10266 if ((dstclass == FPUL_REGS
10267 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10268 || (srcclass == FPUL_REGS
10269 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10272 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10273 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10276 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10278 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10280 if (sh_gettrcost >= 0)
10281 return sh_gettrcost;
10282 else if (!TARGET_PT_FIXED)
10286 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10287 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10292 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10293 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10294 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10296 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10299 static rtx emit_load_ptr (rtx, rtx);
10302 emit_load_ptr (rtx reg, rtx addr)
10304 rtx mem = gen_const_mem (ptr_mode, addr);
10306 if (Pmode != ptr_mode)
10307 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10308 return emit_move_insn (reg, mem);
10312 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10313 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10316 CUMULATIVE_ARGS cum;
10317 int structure_value_byref = 0;
10318 rtx this, this_value, sibcall, insns, funexp;
10319 tree funtype = TREE_TYPE (function);
10320 int simple_add = CONST_OK_FOR_ADD (delta);
10322 rtx scratch0, scratch1, scratch2;
10325 reload_completed = 1;
10326 epilogue_completed = 1;
10327 current_function_uses_only_leaf_regs = 1;
10329 emit_note (NOTE_INSN_PROLOGUE_END);
10331 /* Find the "this" pointer. We have such a wide range of ABIs for the
10332 SH that it's best to do this completely machine independently.
10333 "this" is passed as first argument, unless a structure return pointer
10334 comes first, in which case "this" comes second. */
10335 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10336 #ifndef PCC_STATIC_STRUCT_RETURN
10337 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10338 structure_value_byref = 1;
10339 #endif /* not PCC_STATIC_STRUCT_RETURN */
10340 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10342 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10344 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10346 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10348 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10349 static chain pointer (even if you can't have nested virtual functions
10350 right now, someone might implement them sometime), and the rest of the
10351 registers are used for argument passing, are callee-saved, or reserved. */
10352 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10353 -ffixed-reg has been used. */
10354 if (! call_used_regs[0] || fixed_regs[0])
10355 error ("r0 needs to be available as a call-clobbered register");
10356 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10359 if (call_used_regs[1] && ! fixed_regs[1])
10360 scratch1 = gen_rtx_REG (ptr_mode, 1);
10361 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10362 pointing where to return struct values. */
10363 if (call_used_regs[3] && ! fixed_regs[3])
10364 scratch2 = gen_rtx_REG (Pmode, 3);
10366 else if (TARGET_SHMEDIA)
10368 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10369 if (i != REGNO (scratch0) &&
10370 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10372 scratch1 = gen_rtx_REG (ptr_mode, i);
10375 if (scratch1 == scratch0)
10376 error ("Need a second call-clobbered general purpose register");
10377 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10378 if (call_used_regs[i] && ! fixed_regs[i])
10380 scratch2 = gen_rtx_REG (Pmode, i);
10383 if (scratch2 == scratch0)
10384 error ("Need a call-clobbered target register");
10387 this_value = plus_constant (this, delta);
10389 && (simple_add || scratch0 != scratch1)
10390 && strict_memory_address_p (ptr_mode, this_value))
10392 emit_load_ptr (scratch0, this_value);
10397 ; /* Do nothing. */
10398 else if (simple_add)
10399 emit_move_insn (this, this_value);
10402 emit_move_insn (scratch1, GEN_INT (delta));
10403 emit_insn (gen_add2_insn (this, scratch1));
10411 emit_load_ptr (scratch0, this);
10413 offset_addr = plus_constant (scratch0, vcall_offset);
10414 if (strict_memory_address_p (ptr_mode, offset_addr))
10415 ; /* Do nothing. */
10416 else if (! TARGET_SH5 && scratch0 != scratch1)
10418 /* scratch0 != scratch1, and we have indexed loads. Get better
10419 schedule by loading the offset into r1 and using an indexed
10420 load - then the load of r1 can issue before the load from
10421 (this + delta) finishes. */
10422 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10423 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10425 else if (CONST_OK_FOR_ADD (vcall_offset))
10427 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10428 offset_addr = scratch0;
10430 else if (scratch0 != scratch1)
10432 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10433 emit_insn (gen_add2_insn (scratch0, scratch1));
10434 offset_addr = scratch0;
10437 gcc_unreachable (); /* FIXME */
10438 emit_load_ptr (scratch0, offset_addr);
10440 if (Pmode != ptr_mode)
10441 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10442 emit_insn (gen_add2_insn (this, scratch0));
10445 /* Generate a tail call to the target function. */
10446 if (! TREE_USED (function))
10448 assemble_external (function);
10449 TREE_USED (function) = 1;
10451 funexp = XEXP (DECL_RTL (function), 0);
10452 /* If the function is overridden, so is the thunk, hence we don't
10453 need GOT addressing even if this is a public symbol. */
10455 if (TARGET_SH1 && ! flag_weak)
10456 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10459 if (TARGET_SH2 && flag_pic)
10461 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10462 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10466 if (TARGET_SHMEDIA && flag_pic)
10468 funexp = gen_sym2PIC (funexp);
10469 PUT_MODE (funexp, Pmode);
10471 emit_move_insn (scratch2, funexp);
10472 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10473 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10475 sibcall = emit_call_insn (sibcall);
10476 SIBLING_CALL_P (sibcall) = 1;
10477 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10480 /* Run just enough of rest_of_compilation to do scheduling and get
10481 the insns emitted. Note that use_thunk calls
10482 assemble_start_function and assemble_end_function. */
10484 insn_locators_alloc ();
10485 insns = get_insns ();
10490 /* Initialize the bitmap obstacks. */
10491 bitmap_obstack_initialize (NULL);
10492 bitmap_obstack_initialize (®_obstack);
10495 rtl_register_cfg_hooks ();
10496 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10497 init_rtl_bb_info (EXIT_BLOCK_PTR);
10498 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10499 EXIT_BLOCK_PTR->flags |= BB_RTL;
10500 find_basic_blocks (insns);
10502 if (flag_schedule_insns_after_reload)
10504 life_analysis (PROP_FINAL);
10506 split_all_insns (1);
10510 /* We must split jmp insn in PIC case. */
10512 split_all_insns_noflow ();
10519 split_all_insns_noflow ();
10525 if (optimize > 0 && flag_delayed_branch)
10526 dbr_schedule (insns);
10528 shorten_branches (insns);
10529 final_start_function (insns, file, 1);
10530 final (insns, file, 1);
10531 final_end_function ();
10532 free_after_compilation (cfun);
10534 reload_completed = 0;
10535 epilogue_completed = 0;
10539 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10543 /* If this is not an ordinary function, the name usually comes from a
10544 string literal or an sprintf buffer. Make sure we use the same
10545 string consistently, so that cse will be able to unify address loads. */
10546 if (kind != FUNCTION_ORDINARY)
10547 name = IDENTIFIER_POINTER (get_identifier (name));
10548 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10549 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10553 case FUNCTION_ORDINARY:
10557 rtx reg = target ? target : gen_reg_rtx (Pmode);
10559 emit_insn (gen_symGOT2reg (reg, sym));
10565 /* ??? To allow cse to work, we use GOTOFF relocations.
10566 we could add combiner patterns to transform this into
10567 straight pc-relative calls with sym2PIC / bsrf when
10568 label load and function call are still 1:1 and in the
10569 same basic block during combine. */
10570 rtx reg = target ? target : gen_reg_rtx (Pmode);
10572 emit_insn (gen_symGOTOFF2reg (reg, sym));
10577 if (target && sym != target)
10579 emit_move_insn (target, sym);
10585 /* Find the number of a general purpose register in S. */
10587 scavenge_reg (HARD_REG_SET *s)
10590 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10591 if (TEST_HARD_REG_BIT (*s, r))
10597 sh_get_pr_initial_val (void)
10601 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10602 PR register on SHcompact, because it might be clobbered by the prologue.
10603 We check first if that is known to be the case. */
10604 if (TARGET_SHCOMPACT
10605 && ((crtl->args.info.call_cookie
10606 & ~ CALL_COOKIE_RET_TRAMP (1))
10607 || current_function_saves_all_registers))
10608 return gen_frame_mem (SImode, return_address_pointer_rtx);
10610 /* If we haven't finished rtl generation, there might be a nonlocal label
10611 that we haven't seen yet.
10612 ??? get_hard_reg_initial_val fails if it is called after register
10613 allocation has started, unless it has been called before for the
10614 same register. And even then, we end in trouble if we didn't use
10615 the register in the same basic block before. So call
10616 get_hard_reg_initial_val now and wrap it in an unspec if we might
10617 need to replace it. */
10618 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10619 combine can put the pseudo returned by get_hard_reg_initial_val into
10620 instructions that need a general purpose registers, which will fail to
10621 be recognized when the pseudo becomes allocated to PR. */
10623 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10625 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10630 sh_expand_t_scc (enum rtx_code code, rtx target)
10632 rtx result = target;
10635 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10636 || GET_CODE (sh_compare_op1) != CONST_INT)
10638 if (GET_CODE (result) != REG)
10639 result = gen_reg_rtx (SImode);
10640 val = INTVAL (sh_compare_op1);
10641 if ((code == EQ && val == 1) || (code == NE && val == 0))
10642 emit_insn (gen_movt (result));
10643 else if (TARGET_SH2A && ((code == EQ && val == 0)
10644 || (code == NE && val == 1)))
10645 emit_insn (gen_movrt (result));
10646 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10648 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10649 emit_insn (gen_subc (result, result, result));
10650 emit_insn (gen_addsi3 (result, result, const1_rtx));
10652 else if (code == EQ || code == NE)
10653 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10656 if (result != target)
10657 emit_move_insn (target, result);
10661 /* INSN is an sfunc; return the rtx that describes the address used. */
10663 extract_sfunc_addr (rtx insn)
10665 rtx pattern, part = NULL_RTX;
10668 pattern = PATTERN (insn);
10669 len = XVECLEN (pattern, 0);
10670 for (i = 0; i < len; i++)
10672 part = XVECEXP (pattern, 0, i);
10673 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10674 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10675 return XEXP (part, 0);
10677 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10678 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10681 /* Verify that the register in use_sfunc_addr still agrees with the address
10682 used in the sfunc. This prevents fill_slots_from_thread from changing
10684 INSN is the use_sfunc_addr instruction, and REG is the register it
10687 check_use_sfunc_addr (rtx insn, rtx reg)
10689 /* Search for the sfunc. It should really come right after INSN. */
10690 while ((insn = NEXT_INSN (insn)))
10692 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10694 if (! INSN_P (insn))
10697 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10698 insn = XVECEXP (PATTERN (insn), 0, 0);
10699 if (GET_CODE (PATTERN (insn)) != PARALLEL
10700 || get_attr_type (insn) != TYPE_SFUNC)
10702 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10704 gcc_unreachable ();
10707 /* This function returns a constant rtx that represents pi / 2**15 in
10708 SFmode. it's used to scale SFmode angles, in radians, to a
10709 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10710 maps to 0x10000). */
10712 static GTY(()) rtx sh_fsca_sf2int_rtx;
10715 sh_fsca_sf2int (void)
10717 if (! sh_fsca_sf2int_rtx)
10719 REAL_VALUE_TYPE rv;
10721 real_from_string (&rv, "10430.378350470453");
10722 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10725 return sh_fsca_sf2int_rtx;
10728 /* This function returns a constant rtx that represents pi / 2**15 in
10729 DFmode. it's used to scale DFmode angles, in radians, to a
10730 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10731 maps to 0x10000). */
10733 static GTY(()) rtx sh_fsca_df2int_rtx;
10736 sh_fsca_df2int (void)
10738 if (! sh_fsca_df2int_rtx)
10740 REAL_VALUE_TYPE rv;
10742 real_from_string (&rv, "10430.378350470453");
10743 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10746 return sh_fsca_df2int_rtx;
10749 /* This function returns a constant rtx that represents 2**15 / pi in
10750 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10751 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10754 static GTY(()) rtx sh_fsca_int2sf_rtx;
10757 sh_fsca_int2sf (void)
10759 if (! sh_fsca_int2sf_rtx)
10761 REAL_VALUE_TYPE rv;
10763 real_from_string (&rv, "9.587379924285257e-5");
10764 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10767 return sh_fsca_int2sf_rtx;
10770 /* Initialize the CUMULATIVE_ARGS structure. */
10773 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10775 rtx libname ATTRIBUTE_UNUSED,
10777 signed int n_named_args,
10778 enum machine_mode mode)
10780 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10781 pcum->free_single_fp_reg = 0;
10782 pcum->stack_regs = 0;
10783 pcum->byref_regs = 0;
10785 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10787 /* XXX - Should we check TARGET_HITACHI here ??? */
10788 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10792 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10793 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10794 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10795 pcum->arg_count [(int) SH_ARG_INT]
10796 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10799 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10800 && pcum->arg_count [(int) SH_ARG_INT] == 0
10801 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10802 ? int_size_in_bytes (TREE_TYPE (fntype))
10803 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10804 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10805 == FIRST_RET_REG));
10809 pcum->arg_count [(int) SH_ARG_INT] = 0;
10810 pcum->prototype_p = FALSE;
10811 if (mode != VOIDmode)
10813 pcum->call_cookie =
10814 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10815 && GET_MODE_SIZE (mode) > 4
10816 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10818 /* If the default ABI is the Renesas ABI then all library
10819 calls must assume that the library will be using the
10820 Renesas ABI. So if the function would return its result
10821 in memory then we must force the address of this memory
10822 block onto the stack. Ideally we would like to call
10823 targetm.calls.return_in_memory() here but we do not have
10824 the TYPE or the FNDECL available so we synthesize the
10825 contents of that function as best we can. */
10827 (TARGET_DEFAULT & MASK_HITACHI)
10828 && (mode == BLKmode
10829 || (GET_MODE_SIZE (mode) > 4
10830 && !(mode == DFmode
10831 && TARGET_FPU_DOUBLE)));
10835 pcum->call_cookie = 0;
10836 pcum->force_mem = FALSE;
10841 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10842 not enter into CONST_DOUBLE for the replace.
10844 Note that copying is not done so X must not be shared unless all copies
10845 are to be modified.
10847 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10848 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10849 replacements[n*2+1] - and that we take mode changes into account.
10851 If a replacement is ambiguous, return NULL_RTX.
10853 If MODIFY is zero, don't modify any rtl in place,
10854 just return zero or nonzero for failure / success. */
10857 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10862 /* The following prevents loops occurrence when we change MEM in
10863 CONST_DOUBLE onto the same CONST_DOUBLE. */
10864 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10867 for (i = n_replacements - 1; i >= 0 ; i--)
10868 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10869 return replacements[i*2+1];
10871 /* Allow this function to make replacements in EXPR_LISTs. */
10875 if (GET_CODE (x) == SUBREG)
10877 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10878 n_replacements, modify);
10880 if (GET_CODE (new) == CONST_INT)
10882 x = simplify_subreg (GET_MODE (x), new,
10883 GET_MODE (SUBREG_REG (x)),
10889 SUBREG_REG (x) = new;
10893 else if (GET_CODE (x) == REG)
10895 unsigned regno = REGNO (x);
10896 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10897 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10898 rtx result = NULL_RTX;
10900 for (i = n_replacements - 1; i >= 0; i--)
10902 rtx from = replacements[i*2];
10903 rtx to = replacements[i*2+1];
10904 unsigned from_regno, from_nregs, to_regno, new_regno;
10906 if (GET_CODE (from) != REG)
10908 from_regno = REGNO (from);
10909 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10910 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10911 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10913 if (regno < from_regno
10914 || regno + nregs > from_regno + nregs
10915 || GET_CODE (to) != REG
10918 to_regno = REGNO (to);
10919 if (to_regno < FIRST_PSEUDO_REGISTER)
10921 new_regno = regno + to_regno - from_regno;
10922 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10925 result = gen_rtx_REG (GET_MODE (x), new_regno);
10927 else if (GET_MODE (x) <= GET_MODE (to))
10928 result = gen_lowpart_common (GET_MODE (x), to);
10930 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10933 return result ? result : x;
10935 else if (GET_CODE (x) == ZERO_EXTEND)
10937 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10938 n_replacements, modify);
10940 if (GET_CODE (new) == CONST_INT)
10942 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10943 new, GET_MODE (XEXP (x, 0)));
10953 fmt = GET_RTX_FORMAT (GET_CODE (x));
10954 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10960 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10961 n_replacements, modify);
10967 else if (fmt[i] == 'E')
10968 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10970 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10971 n_replacements, modify);
10975 XVECEXP (x, i, j) = new;
10983 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10985 enum rtx_code code = TRUNCATE;
10987 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10989 rtx inner = XEXP (x, 0);
10990 enum machine_mode inner_mode = GET_MODE (inner);
10992 if (inner_mode == mode)
10994 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10996 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10997 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10999 code = GET_CODE (x);
11003 return gen_rtx_fmt_e (code, mode, x);
11006 /* called via for_each_rtx after reload, to clean up truncates of
11007 registers that span multiple actual hard registers. */
11009 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11013 if (GET_CODE (x) != TRUNCATE)
11016 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11018 enum machine_mode reg_mode = GET_MODE (reg);
11019 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11020 subreg_lowpart_offset (DImode, reg_mode));
11021 *(int*) n_changes += 1;
11027 /* Load and store depend on the highpart of the address. However,
11028 set_attr_alternative does not give well-defined results before reload,
11029 so we must look at the rtl ourselves to see if any of the feeding
11030 registers is used in a memref. */
11032 /* Called by sh_contains_memref_p via for_each_rtx. */
11034 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11036 return (GET_CODE (*loc) == MEM);
11039 /* Return nonzero iff INSN contains a MEM. */
11041 sh_contains_memref_p (rtx insn)
11043 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11046 /* Return nonzero iff INSN loads a banked register. */
11048 sh_loads_bankedreg_p (rtx insn)
11050 if (GET_CODE (PATTERN (insn)) == SET)
11052 rtx op = SET_DEST (PATTERN(insn));
11053 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11060 /* FNADDR is the MEM expression from a call expander. Return an address
11061 to use in an SHmedia insn pattern. */
11063 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11067 fnaddr = XEXP (fnaddr, 0);
11068 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11069 if (flag_pic && is_sym)
11071 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11073 rtx reg = gen_reg_rtx (Pmode);
11075 /* We must not use GOTPLT for sibcalls, because PIC_REG
11076 must be restored before the PLT code gets to run. */
11078 emit_insn (gen_symGOT2reg (reg, fnaddr));
11080 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11085 fnaddr = gen_sym2PIC (fnaddr);
11086 PUT_MODE (fnaddr, Pmode);
11089 /* If ptabs might trap, make this visible to the rest of the compiler.
11090 We generally assume that symbols pertain to valid locations, but
11091 it is possible to generate invalid symbols with asm or linker tricks.
11092 In a list of functions where each returns its successor, an invalid
11093 symbol might denote an empty list. */
11094 if (!TARGET_PT_FIXED
11095 && (!is_sym || TARGET_INVALID_SYMBOLS)
11096 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11098 rtx tr = gen_reg_rtx (PDImode);
11100 emit_insn (gen_ptabs (tr, fnaddr));
11103 else if (! target_reg_operand (fnaddr, Pmode))
11104 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11109 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
11110 enum machine_mode mode, secondary_reload_info *sri)
11114 if (REGCLASS_HAS_FP_REG (class)
11115 && ! TARGET_SHMEDIA
11116 && immediate_operand ((x), mode)
11117 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11118 && mode == SFmode && fldi_ok ()))
11122 sri->icode = CODE_FOR_reload_insf__frn;
11125 sri->icode = CODE_FOR_reload_indf__frn;
11128 /* ??? If we knew that we are in the appropriate mode -
11129 single precision - we could use a reload pattern directly. */
11134 if (class == FPUL_REGS
11135 && ((GET_CODE (x) == REG
11136 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11137 || REGNO (x) == T_REG))
11138 || GET_CODE (x) == PLUS))
11139 return GENERAL_REGS;
11140 if (class == FPUL_REGS && immediate_operand (x, mode))
11142 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11143 return GENERAL_REGS;
11144 else if (mode == SFmode)
11146 sri->icode = CODE_FOR_reload_insi__i_fpul;
11149 if (class == FPSCR_REGS
11150 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11151 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11152 return GENERAL_REGS;
11153 if (REGCLASS_HAS_FP_REG (class)
11155 && immediate_operand (x, mode)
11156 && x != CONST0_RTX (GET_MODE (x))
11157 && GET_MODE (x) != V4SFmode)
11158 return GENERAL_REGS;
11159 if ((mode == QImode || mode == HImode)
11160 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11162 sri->icode = ((mode == QImode)
11163 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11166 if (TARGET_SHMEDIA && class == GENERAL_REGS
11167 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
11168 return TARGET_REGS;
11169 } /* end of input-only processing. */
11171 if (((REGCLASS_HAS_FP_REG (class)
11172 && (GET_CODE (x) == REG
11173 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11174 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11175 && TARGET_FMOVD))))
11176 || (REGCLASS_HAS_GENERAL_REG (class)
11177 && GET_CODE (x) == REG
11178 && FP_REGISTER_P (REGNO (x))))
11179 && ! TARGET_SHMEDIA
11180 && (mode == SFmode || mode == SImode))
11182 if ((class == FPUL_REGS
11183 || (REGCLASS_HAS_FP_REG (class)
11184 && ! TARGET_SHMEDIA && mode == SImode))
11185 && (GET_CODE (x) == MEM
11186 || (GET_CODE (x) == REG
11187 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11188 || REGNO (x) == T_REG
11189 || system_reg_operand (x, VOIDmode)))))
11191 if (class == FPUL_REGS)
11192 return GENERAL_REGS;
11195 if ((class == TARGET_REGS
11196 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
11197 && !satisfies_constraint_Csy (x)
11198 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11199 return GENERAL_REGS;
11200 if ((class == MAC_REGS || class == PR_REGS)
11201 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11202 && class != REGNO_REG_CLASS (REGNO (x)))
11203 return GENERAL_REGS;
11204 if (class != GENERAL_REGS && GET_CODE (x) == REG
11205 && TARGET_REGISTER_P (REGNO (x)))
11206 return GENERAL_REGS;
11210 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;